]> rtime.felk.cvut.cz Git - opencv.git/blobdiff - opencv/src/cv/cvstereosgbm.cpp
renamed all the _[A-Z] variables to avoid possible name conflicts.
[opencv.git] / opencv / src / cv / cvstereosgbm.cpp
index 267b75f7f053297a66d1a3a125b3b3c6ab2f3e11..bbd6ea83cabf9023b856e00e3cbb3350986e61db 100644 (file)
@@ -110,47 +110,57 @@ StereoSGBM::~StereoSGBM()
  */
 static void calcPixelCostBT( const Mat& img1, const Mat& img2, int y,
                             int minD, int maxD, CostType* cost,
-                            PixType* buffer, const PixType* tab, int tabOfs )
+                            PixType* buffer, const PixType* tab,
+                            int tabOfs, int ftzero )
 {
     int x, c, width = img1.cols, cn = img1.channels();
     int minX1 = max(maxD, 0), maxX1 = width + min(minD, 0);
     int minX2 = max(minX1 - maxD, 0), maxX2 = min(maxX1 - minD, width);
     int D = maxD - minD, width1 = maxX1 - minX1, width2 = maxX2 - minX2;
     const PixType *row1 = img1.ptr<PixType>(y), *row2 = img2.ptr<PixType>(y);
-    PixType *prow1 = buffer + width2*2, *prow2 = prow1 + width*cn;
+    PixType *prow1 = buffer + width2*2, *prow2 = prow1 + width*cn*2;
     
     tab += tabOfs;
     
-    for( c = 0; c < cn; c++ )
+    for( c = 0; c < cn*2; c++ )
     {
         prow1[width*c] = prow1[width*c + width-1] = 
         prow2[width*c] = prow2[width*c + width-1] = tab[0];
     }
     
+    int n1 = y > 0 ? -(int)img1.step : 0, s1 = y < img1.rows-1 ? (int)img1.step : 0;
+    int n2 = y > 0 ? -(int)img2.step : 0, s2 = y < img2.rows-1 ? (int)img2.step : 0;
+    
     if( cn == 1 )
     {
-        int n1 = y > 0 ? -img1.step : 0, s1 = y < img1.rows-1 ? img1.step : 0;
-        int n2 = y > 0 ? -img2.step : 0, s2 = y < img2.rows-1 ? img2.step : 0;
-        
         for( x = 1; x < width-1; x++ )
         {
-            //prow1[x] = tab[row1[x+1] - row1[x-1]];
-            //prow2[width-1-x] = tab[row2[x+1] - row2[x-1]];
             prow1[x] = tab[(row1[x+1] - row1[x-1])*2 + row1[x+n1+1] - row1[x+n1-1] + row1[x+s1+1] - row1[x+s1-1]];
             prow2[width-1-x] = tab[(row2[x+1] - row2[x-1])*2 + row2[x+n2+1] - row2[x+n2-1] + row2[x+s2+1] - row2[x+s2-1]];
+            
+            prow1[x+width] = row1[x];
+            prow2[width-1-x+width] = row2[x];
         }
     }
     else
     {
         for( x = 1; x < width-1; x++ )
         {
-            prow1[x] = tab[row1[x*3+3] - row1[x*3-3]];
-            prow1[x+width] = tab[row1[x*3+4] - row1[x*3-2]];
-            prow1[x+width*2] = tab[row1[x*3+5] - row1[x*3-1]];
+            prow1[x] = tab[(row1[x*3+3] - row1[x*3-3])*2 + row1[x*3+n1+3] - row1[x*3+n1-3] + row1[x*3+s1+3] - row1[x*3+s1-3]];
+            prow1[x+width] = tab[(row1[x*3+4] - row1[x*3-2])*2 + row1[x*3+n1+4] - row1[x*3+n1-2] + row1[x*3+s1+4] - row1[x*3+s1-2]];
+            prow1[x+width*2] = tab[(row1[x*3+5] - row1[x*3-1])*2 + row1[x*3+n1+5] - row1[x*3+n1-1] + row1[x*3+s1+5] - row1[x*3+s1-1]];
+            
+            prow2[width-1-x] = tab[(row2[x*3+3] - row2[x*3-3])*2 + row2[x*3+n2+3] - row2[x*3+n2-3] + row2[x*3+s2+3] - row2[x*3+s2-3]];
+            prow2[width-1-x+width] = tab[(row2[x*3+4] - row2[x*3-2])*2 + row2[x*3+n2+4] - row2[x*3+n2-2] + row2[x*3+s2+4] - row2[x*3+s2-2]];
+            prow2[width-1-x+width*2] = tab[(row2[x*3+5] - row2[x*3-1])*2 + row2[x*3+n2+5] - row2[x*3+n2-1] + row2[x*3+s2+5] - row2[x*3+s2-1]];
             
-            prow2[width-1-x] = tab[row2[x*3+3] - row2[x*3-3]];
-            prow2[width-1-x+width] = tab[row2[x*3+4] - row2[x*3-2]];
-            prow2[width-1-x+width*2] = tab[row2[x*3+5] - row2[x*3-1]];
+            prow1[x+width*3] = row1[x*3];
+            prow1[x+width*4] = row1[x*3+1];
+            prow1[x+width*5] = row1[x*3+2];
+            
+            prow2[width-1-x+width*3] = row2[x*3];
+            prow2[width-1-x+width*4] = row2[x*3+1];
+            prow2[width-1-x+width*5] = row2[x*3+2];
         }
     }
     
@@ -159,11 +169,15 @@ static void calcPixelCostBT( const Mat& img1, const Mat& img2, int y,
     buffer -= minX2;
     cost -= minX1*D + minD; // simplify the cost indices inside the loop
     
+#if CV_SSE2    
     volatile bool useSIMD = checkHardwareSupport(CV_CPU_SSE2);
+#endif
     
 #if 1    
-    for( c = 0; c < cn; c++, prow1 += width, prow2 += width )
+    for( c = 0; c < cn*2; c++, prow1 += width, prow2 += width )
     {
+        int diff_scale = c < cn ? 0 : 2;
+        
         // precompute
         //   v0 = min(row2[x-1/2], row2[x], row2[x+1/2]) and
         //   v1 = max(row2[x-1/2], row2[x], row2[x+1/2]) and
@@ -189,8 +203,9 @@ static void calcPixelCostBT( const Mat& img1, const Mat& img2, int y,
         #if CV_SSE2
             if( useSIMD )
             {
-                __m128i _u = _mm_set1_epi8(u), _u0 = _mm_set1_epi8(u0);
-                __m128i _u1 = _mm_set1_epi8(u1), z = _mm_setzero_si128();
+                __m128i _u = _mm_set1_epi8((char)u), _u0 = _mm_set1_epi8((char)u0);
+                __m128i _u1 = _mm_set1_epi8((char)u1), z = _mm_setzero_si128();
+                __m128i ds = _mm_cvtsi32_si128(diff_scale);
                 
                 for( int d = minD; d < maxD; d += 16 )
                 {
@@ -204,8 +219,8 @@ static void calcPixelCostBT( const Mat& img1, const Mat& img2, int y,
                     c0 = _mm_load_si128((__m128i*)(cost + x*D + d));
                     c1 = _mm_load_si128((__m128i*)(cost + x*D + d + 8));
                     
-                    _mm_store_si128((__m128i*)(cost + x*D + d), _mm_adds_epi16(c0, _mm_unpacklo_epi8(diff,z)));
-                    _mm_store_si128((__m128i*)(cost + x*D + d + 8), _mm_adds_epi16(c1, _mm_unpackhi_epi8(diff,z)));
+                    _mm_store_si128((__m128i*)(cost + x*D + d), _mm_adds_epi16(c0, _mm_srl_epi16(_mm_unpacklo_epi8(diff,z), ds)));
+                    _mm_store_si128((__m128i*)(cost + x*D + d + 8), _mm_adds_epi16(c1, _mm_srl_epi16(_mm_unpackhi_epi8(diff,z), ds)));
                 }
             }
             else
@@ -219,13 +234,13 @@ static void calcPixelCostBT( const Mat& img1, const Mat& img2, int y,
                     int c0 = max(0, u - v1); c0 = max(c0, v0 - u);
                     int c1 = max(0, v - u1); c1 = max(c1, u0 - v);
                     
-                    cost[x*D + d] = (CostType)(cost[x*D+d] + min(c0, c1));
+                    cost[x*D + d] = (CostType)(cost[x*D+d] + (min(c0, c1) >> diff_scale));
                 }
             }
         }
     }
 #else
-    for( c = 0; c < cn; c++, prow1 += width, prow2 += width )
+    for( c = 0; c < cn*2; c++, prow1 += width, prow2 += width )
     {
         for( x = minX1; x < maxX1; x++ )
         {
@@ -252,7 +267,7 @@ static void calcPixelCostBT( const Mat& img1, const Mat& img2, int y,
                 for( int d = minD; d < maxD; d++ )
                 {
                     int v = prow2[width-1-x + d];
-                    cost[x*D + d] = (CostType)(cost[x*D + d] + std::abs(u - v));
+                    cost[x*D + d] = (CostType)(cost[x*D + d] + (CostType)std::abs(u - v));
                 }
             }
         }
@@ -297,6 +312,8 @@ static void computeDisparitySGBM( const Mat& img1, const Mat& img2,
         6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
         5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
     };
+    
+    volatile bool useSIMD = checkHardwareSupport(CV_CPU_SSE2);
 #endif    
     
     const int ALIGN = 16;
@@ -351,7 +368,7 @@ static void computeDisparitySGBM( const Mat& img1, const Mat& img2,
     size_t totalBufSize = (LrSize + minLrSize)*NLR*sizeof(CostType) + // minLr[] and Lr[]
     costBufSize*(hsumBufNRows + 1)*sizeof(CostType) + // hsumBuf, pixdiff
     CSBufSize*2*sizeof(CostType) + // C, S
-    width*8*img1.channels()*sizeof(PixType) + // temp buffer for computing per-pixel cost
+    width*16*img1.channels()*sizeof(PixType) + // temp buffer for computing per-pixel cost
     width*(sizeof(CostType) + sizeof(DispType)) + 1024; // disp2cost + disp2
     
     if( !buffer.data || !buffer.isContinuous() ||
@@ -368,11 +385,9 @@ static void computeDisparitySGBM( const Mat& img1, const Mat& img2,
     DispType* disp2ptr = (DispType*)(disp2cost + width);
     PixType* tempBuf = (PixType*)(disp2ptr + width);
     
-    volatile bool useSIMD = checkHardwareSupport(CV_CPU_SSE2);
-    
     // add P2 to every C(x,y). it saves a few operations in the inner loops
     for( k = 0; k < width1*D; k++ )
-        Cbuf[k] = P2;
+        Cbuf[k] = (CostType)P2;
     
     for( int pass = 1; pass <= npasses; pass++ )
     {
@@ -421,7 +436,7 @@ static void computeDisparitySGBM( const Mat& img1, const Mat& img2,
                     
                     if( k < height )
                     {
-                        calcPixelCostBT( img1, img2, k, minD, maxD, pixDiff, tempBuf, clipTab, TAB_OFS );
+                        calcPixelCostBT( img1, img2, k, minD, maxD, pixDiff, tempBuf, clipTab, TAB_OFS, ftzero );
                         
                         memset(hsumAdd, 0, D*sizeof(CostType));
                         for( x = 0; x <= SW2*D; x += D )
@@ -645,15 +660,14 @@ static void computeDisparitySGBM( const Mat& img1, const Mat& img2,
             {
                 for( x = 0; x < width; x++ )
                 {
-                    disp1ptr[x] = disp2ptr[x] = INVALID_DISP_SCALED;
+                    disp1ptr[x] = disp2ptr[x] = (DispType)INVALID_DISP_SCALED;
                     disp2cost[x] = MAX_COST;
                 }
                 
                 for( x = width1 - 1; x >= 0; x-- )
                 {
                     CostType* Sp = S + x*D;
-                    CostType minS = MAX_COST;
-                    int bestDisp = -1;
+                    int minS = MAX_COST, bestDisp = -1;
                     
                     if( npasses == 1 )
                     {
@@ -705,16 +719,16 @@ static void computeDisparitySGBM( const Mat& img1, const Mat& img2,
                             _minL0 = _mm_min_epi16(_minL0, _mm_srli_si128(_minL0, 4));
                             _minL0 = _mm_min_epi16(_minL0, _mm_srli_si128(_minL0, 2));
                             
-                            __m128i _S = _mm_min_epi16(_minS, _mm_srli_si128(_minS, 8));
-                            _S = _mm_min_epi16(_S, _mm_srli_si128(_S, 4));
-                            _S = _mm_min_epi16(_S, _mm_srli_si128(_S, 2));
+                            __m128i qS = _mm_min_epi16(_minS, _mm_srli_si128(_minS, 8));
+                            qS = _mm_min_epi16(qS, _mm_srli_si128(qS, 4));
+                            qS = _mm_min_epi16(qS, _mm_srli_si128(qS, 2));
                             
                             minLr[0][xm] = (CostType)_mm_cvtsi128_si32(_minL0);
-                            minS = (CostType)_mm_cvtsi128_si32(_S);
+                            minS = (CostType)_mm_cvtsi128_si32(qS);
                             
-                            _S = _mm_shuffle_epi32(_mm_unpacklo_epi16(_S, _S), 0);
-                            _S = _mm_cmpeq_epi16(_minS, _S);
-                            int idx = _mm_movemask_epi8(_mm_packs_epi16(_S, _S)) & 255;
+                            qS = _mm_shuffle_epi32(_mm_unpacklo_epi16(qS, qS), 0);
+                            qS = _mm_cmpeq_epi16(_minS, qS);
+                            int idx = _mm_movemask_epi8(_mm_packs_epi16(qS, qS)) & 255;
                             
                             bestDisp = bestDispBuf[LSBTab[idx]];
                         }
@@ -840,13 +854,13 @@ void filterSpeckles( Mat& img, double _newval, int maxSpeckleSize, double _maxDi
                 if( ls[j] )            // has a label, check for bad label
                 {  
                     if( rtype[ls[j]] ) // small region, zero out disparity
-                        ds[j] = newVal;
+                        ds[j] = (short)newVal;
                 }
                 // no label, assign and propagate
                 else
                 {
                     Point2s* ws = wbuf;        // initialize wavefront
-                    Point2s p(j, i);   // current pixel
+                    Point2s p((short)j, (short)i);     // current pixel
                     curlabel++;        // next label
                     int count = 0;     // current region size
                     ls[j] = curlabel;
@@ -893,7 +907,7 @@ void filterSpeckles( Mat& img, double _newval, int maxSpeckleSize, double _maxDi
                     if( count <= maxSpeckleSize )      // speckle region
                     {
                         rtype[ls[j]] = 1;      // small region label
-                        ds[j] = newVal;
+                        ds[j] = (short)newVal;
                     }
                     else
                         rtype[ls[j]] = 0;      // large region label