]> rtime.felk.cvut.cz Git - opencv.git/commitdiff
one more speed improvement in StereoBM
authorvp153 <vp153@73c94f0f-984f-4a5f-82bc-2d8db8d8ee08>
Sun, 21 Feb 2010 02:25:25 +0000 (02:25 +0000)
committervp153 <vp153@73c94f0f-984f-4a5f-82bc-2d8db8d8ee08>
Sun, 21 Feb 2010 02:25:25 +0000 (02:25 +0000)
git-svn-id: https://code.ros.org/svn/opencv/trunk@2709 73c94f0f-984f-4a5f-82bc-2d8db8d8ee08

opencv/src/cv/cvstereobm.cpp

index a9a185229d649c68b334a5c2f3ae64e00ef4112a..8e092273325a3505d60e2b5640c9b04c308432c3 100644 (file)
@@ -291,11 +291,18 @@ static void icvFindStereoCorrespondenceBM_SSE2( const CvMat* left, const CvMat*
         for( y = -dy0; y < height + dy1; y++, hsad += ndisp, cbuf += ndisp, lptr += sstep, rptr += sstep )\r
         {\r
             int lval = lptr[0];\r
-            for( d = 0; d < ndisp; d++ )\r
+            __m128i lv = _mm_set1_epi8((char)lval), z = _mm_setzero_si128();\r
+            for( d = 0; d < ndisp; d += 16 )\r
             {\r
-                int diff = abs(lval - rptr[d]);\r
-                cbuf[d] = (uchar)diff;\r
-                hsad[d] = (ushort)(hsad[d] + diff);\r
+                __m128i rv = _mm_loadu_si128((const __m128i*)(rptr + d));\r
+                __m128i hsad_l = _mm_load_si128((__m128i*)(hsad + d));\r
+                __m128i hsad_h = _mm_load_si128((__m128i*)(hsad + d + 8));\r
+                __m128i diff = _mm_adds_epu8(_mm_subs_epu8(lv, rv), _mm_subs_epu8(rv, lv));\r
+                _mm_store_si128((__m128i*)(cbuf + d), diff);\r
+                hsad_l = _mm_add_epi16(hsad_l, _mm_unpacklo_epi8(diff,z));\r
+                hsad_h = _mm_add_epi16(hsad_h, _mm_unpackhi_epi8(diff,z));\r
+                _mm_store_si128((__m128i*)(hsad + d), hsad_l);\r
+                _mm_store_si128((__m128i*)(hsad + d + 8), hsad_h);\r
             }\r
             htext[y] += tab[lval];\r
         }\r