]> rtime.felk.cvut.cz Git - opencv.git/commitdiff
converted several OpenMP loops into TBB's parallel_for()/parallel_do().
authorvp153 <vp153@73c94f0f-984f-4a5f-82bc-2d8db8d8ee08>
Wed, 24 Mar 2010 13:52:24 +0000 (13:52 +0000)
committervp153 <vp153@73c94f0f-984f-4a5f-82bc-2d8db8d8ee08>
Wed, 24 Mar 2010 13:52:24 +0000 (13:52 +0000)
git-svn-id: https://code.ros.org/svn/opencv/trunk@2893 73c94f0f-984f-4a5f-82bc-2d8db8d8ee08

opencv/3rdparty/libtiff/CMakeLists.txt
opencv/CMakeLists.txt
opencv/include/opencv/cv.hpp
opencv/src/cv/_cv.h
opencv/src/cv/cvcascadedetect.cpp
opencv/src/cv/cvdistransform.cpp
opencv/src/cv/cvhaar.cpp
opencv/src/cv/cvstereobm.cpp
opencv/src/cv/cvsurf.cpp
opencv/src/highgui/window.cpp
opencv/src/highgui/window_cocoa.mm

index 1b2c60c0d3ee33379e6ef6ec24f9584cc3a40485..8e2688f5199d58c59ef099456d0c325e9e47b7ce 100644 (file)
@@ -11,6 +11,8 @@ include_directories(
        "${CMAKE_CURRENT_SOURCE_DIR}/../include"
        )
 
+add_definitions(-DHAVE_STRING_H=1)
+
 # The .cpp files:
 
 set(lib_srcs
index 367a09563e80a859819574c4ac903f7b187ebe8d..bdac3c78ffc3382ab5c997ca3400ad18a6621f66 100644 (file)
@@ -247,9 +247,9 @@ if(CMAKE_COMPILER_IS_GNUCXX)
         set(USE_FAST_MATH ON CACHE BOOL "Enable -ffast-math for GCC")\r
         set(ENABLE_SSE ON CACHE BOOL "Enable SSE for GCC")\r
         set(ENABLE_SSE2 ON CACHE BOOL "Enable SSE2 for GCC")\r
-        set(ENABLE_SSE3 ON CACHE BOOL "Enable SSE3 for GCC")\r
-        set(ENABLE_SSSE3 ON CACHE BOOL "Enable SSSE3 for GCC")\r
-        #set(ENABLE_SSE4_1 ON CACHE BOOL "Enable SSE4.1 for GCC")\r
+        set(ENABLE_SSE3 OFF CACHE BOOL "Enable SSE3 for GCC")\r
+        set(ENABLE_SSSE3 OFF CACHE BOOL "Enable SSSE3 for GCC")\r
+        #set(ENABLE_SSE4_1 OFF CACHE BOOL "Enable SSE4.1 for GCC")\r
     endif()\r
 endif()\r
 \r
@@ -495,34 +495,19 @@ endif()
 \r
 if (WITH_TBB)\r
     if (WIN32 OR APPLE)\r
-        include(OpenCVFindTBB.cmake)\r
-        if (TBB_FOUND)\r
-            set(HAVE_TBB 1)\r
-\r
-            if(NOT "${TBB_INCLUDE_DIRS}" STREQUAL "")\r
-                include_directories("${TBB_INCLUDE_DIRS}")\r
-            endif()\r
-\r
-            link_directories("${TBB_LIBRARY_DIRS}")\r
-            if("${CMAKE_BUILD_TYPE}" STREQUAL "Debug")\r
-                set(OPENCV_LINKER_LIBS ${OPENCV_LINKER_LIBS} ${TBB_DEBUG_LIBRARIES})\r
+        find_path(TBB_INCLUDE_DIRS "tbb/tbb.h"\r
+                    PATHS "/usr/include" "/usr/local/include"\r
+                    DOC "The path to TBB headers")\r
+        \r
+        if (TBB_INCLUDE_DIRS)\r
+            if (WIN32)\r
+                set(TBB_LIB "${TBB_INCLUDE_DIRS}/../lib/tbb.lib" CACHE PATH "Full name of TBB library")\r
             else()\r
-                set(OPENCV_LINKER_LIBS ${OPENCV_LINKER_LIBS} ${TBB_LIBRARIES})\r
+                set(TBB_LIB "${TBB_INCLUDE_DIRS}/../lib/libtbb.dylib" CACHE PATH "Full name of TBB library")\r
             endif()\r
-        endif()\r
-    else()\r
-        PKG_CHECK_MODULES(TBB tbb)\r
-        message(STATUS "TBB detected: ${TBBLIB_FOUND}")\r
-\r
-        if (TBB_FOUND)\r
             set(HAVE_TBB 1)\r
-\r
-            if(NOT "${TBB_INCLUDE_DIRS}" STREQUAL "")\r
-                include_directories("${TBB_INCLUDE_DIRS}")\r
-            endif()\r
-\r
-            link_directories("${TBB_LIBRARY_DIRS}")\r
-            set(OPENCV_LINKER_LIBS ${OPENCV_LINKER_LIBS} ${TBB_LIBRARIES})\r
+            include_directories("${TBB_INCLUDE_DIRS}")\r
+            set(OPENCV_LINKER_LIBS ${OPENCV_LINKER_LIBS} ${TBB_LIB})\r
         endif()\r
     endif()\r
 endif()\r
@@ -640,9 +625,6 @@ include_directories("."
 # Should be set to true for development\r
 set(OPENCV_WARNINGS_ARE_ERRORS OFF CACHE BOOL "Treat warnings as errors")\r
 \r
-# Whole program optimization?\r
-set(OPENCV_WHOLE_PROGRAM_OPTIMIZATION OFF CACHE BOOL "Flags for whole program optimization.")\r
-\r
 set(EXTRA_C_FLAGS "")\r
 set(EXTRA_C_FLAGS_RELEASE "")\r
 set(EXTRA_C_FLAGS_DEBUG "")\r
@@ -662,13 +644,6 @@ if(MSVC)
 \r
     set(EXTRA_EXE_LINKER_FLAGS_RELEASE "${EXTRA_EXE_LINKER_FLAGS_RELEASE} /debug")\r
 \r
-    # Whole program optimization\r
-    if(OPENCV_WHOLE_PROGRAM_OPTIMIZATION AND MSVC_VERSION GREATER 1300)\r
-        set(EXTRA_C_FLAGS_RELEASE "${EXTRA_C_FLAGS_RELEASE} /GL")\r
-        set(CMAKE_EXE_LINKER_FLAGS_RELEASE "${CMAKE_EXE_LINKER_FLAGS_RELEASE} /LTCG")\r
-        set(CMAKE_MODULE_LINKER_FLAGS_RELEASE "${CMAKE_MODULE_LINKER_FLAGS_RELEASE} /LTCG")\r
-    endif()\r
-\r
     if(ENABLE_OPENMP)\r
         set(EXTRA_C_FLAGS_RELEASE "${EXTRA_C_FLAGS_RELEASE} /openmp")\r
     endif()\r
@@ -706,11 +681,6 @@ if(CMAKE_COMPILER_IS_GNUCXX)
         set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -Werror")\r
     endif()\r
 \r
-    # Whole program optimization\r
-    if(OPENCV_WHOLE_PROGRAM_OPTIMIZATION)\r
-        set(EXTRA_C_FLAGS_RELEASE "${EXTRA_C_FLAGS_RELEASE} -fwhole-program --combine")\r
-    endif()\r
-\r
     # Other optimizations\r
     if(USE_OMIT_FRAME_POINTER)\r
        set(EXTRA_C_FLAGS_RELEASE "${EXTRA_C_FLAGS_RELEASE} -fomit-frame-pointer")\r
index bf56064dfaa1677c5c6b8ce808bc26adca0b33c7..3de1c9f871356e99a333ec56286d0ff54a7eef30 100644 (file)
@@ -621,6 +621,7 @@ public:
 ///////////////////////////// Object Detection ////////////////////////////
 
 CV_EXPORTS void groupRectangles(vector<Rect>& rectList, int groupThreshold, double eps=0.2);
+CV_EXPORTS void groupRectangles(vector<Rect>& rectList, vector<int>& weights, int groupThreshold, double eps=0.2);
         
 class CV_EXPORTS FeatureEvaluator
 {
index 665fbbe4be310507b186580384c8b8461fd1df78..62cb5144ffd88cc6004f76e34d18f7abd5efe538 100644 (file)
@@ -184,6 +184,8 @@ CvPyramid;
         {\r
             tbb::parallel_do(first, last, body);\r
         }\r
+        \r
+        typedef tbb::concurrent_vector<Rect> ConcurrentRectVector;\r
     }\r
 #else\r
     namespace cv\r
@@ -213,6 +215,8 @@ CvPyramid;
             for( ; first != last; ++first )\r
                 body(*first);\r
         }\r
+        \r
+        typedef std::vector<Rect> ConcurrentRectVector;\r
     }\r
 #endif\r
 \r
index 6db297d145fa7b6f87eec484a218846cc82b3a15..1ac3cc7c31ec4373652878780709ab0c339fcede 100755 (executable)
 #include "_cv.h"
 #include <cstdio>
 
-#ifdef _OPENMP
-#include "omp.h"
-#endif
-
 namespace cv
 {
 
@@ -66,16 +62,27 @@ public:
     double eps;
 };    
     
-void groupRectangles(vector<Rect>& rectList, int groupThreshold, double eps)
+
+static void groupRectangles(vector<Rect>& rectList, int groupThreshold, double eps, vector<int>* weights)
 {
     if( groupThreshold <= 0 || rectList.empty() )
+    {
+        if( weights )
+        {
+            size_t i, sz = rectList.size();
+            weights->resize(sz);
+            for( i = 0; i < sz; i++ )
+                (*weights)[i] = 1;
+        }
         return;
+    }
     
     vector<int> labels;
     int nclasses = partition(rectList, labels, SimilarRects(eps));
+    
     vector<Rect> rrects(nclasses);
     vector<int> rweights(nclasses, 0);
-    int i, nlabels = (int)labels.size();
+    int i, j, nlabels = (int)labels.size();
     for( i = 0; i < nlabels; i++ )
     {
         int cls = labels[i];
@@ -85,19 +92,68 @@ void groupRectangles(vector<Rect>& rectList, int groupThreshold, double eps)
         rrects[cls].height += rectList[i].height;
         rweights[cls]++;
     }
-    rectList.clear();
+    
     for( i = 0; i < nclasses; i++ )
     {
         Rect r = rrects[i];
-        if( rweights[i] <= groupThreshold )
-            continue;
         float s = 1.f/rweights[i];
-        rectList.push_back(Rect(saturate_cast<int>(r.x*s),
-                                saturate_cast<int>(r.y*s),
-                                saturate_cast<int>(r.width*s),
-                                saturate_cast<int>(r.height*s)));
+        rrects[i] = Rect(saturate_cast<int>(r.x*s),
+             saturate_cast<int>(r.y*s),
+             saturate_cast<int>(r.width*s),
+             saturate_cast<int>(r.height*s));
+    }
+    
+    rectList.clear();
+    if( weights )
+        weights->clear();
+    
+    for( i = 0; i < nclasses; i++ )
+    {
+        Rect r1 = rrects[i];
+        int n1 = rweights[i];
+        if( n1 <= groupThreshold )
+            continue;
+        // filter out small face rectangles inside large rectangles
+        for( j = 0; j < nclasses; j++ )
+        {
+            int n2 = rweights[j];
+            
+            if( j == i || n2 <= groupThreshold )
+                continue;
+            Rect r2 = rrects[j];
+            
+            int dx = saturate_cast<int>( r2.width * eps );
+            int dy = saturate_cast<int>( r2.height * eps );
+            
+            if( i != j &&
+                r1.x >= r2.x - dx &&
+                r1.y >= r2.y - dy &&
+                r1.x + r1.width <= r2.x + r2.width + dx &&
+                r1.y + r1.height <= r2.y + r2.height + dy &&
+                (n2 > std::max(3, n1) || n1 < 3) )
+                break;
+        }
+        
+        if( j == nclasses )
+        {
+            rectList.push_back(r1);
+            if( weights )
+                weights->push_back(n1);
+        }
     }
 }
+
+
+void groupRectangles(vector<Rect>& rectList, int groupThreshold, double eps)
+{
+    groupRectangles(rectList, groupThreshold, eps, 0);
+}
+    
+void groupRectangles(vector<Rect>& rectList, vector<int>& weights, int groupThreshold, double eps)
+{
+    groupRectangles(rectList, groupThreshold, eps, &weights);
+}
+
     
 #define CC_CASCADE_PARAMS "cascadeParams"
 #define CC_STAGE_TYPE     "stageType"
@@ -750,6 +806,44 @@ bool CascadeClassifier::setImage( Ptr<FeatureEvaluator> &_feval, const Mat& imag
     return empty() ? false : _feval->setImage(image, origWinSize );
 }
     
+struct CascadeClassifierInvoker
+{
+    CascadeClassifierInvoker( CascadeClassifier& _cc, Size _sz1, int _stripSize, int _yStep, double _factor, ConcurrentRectVector& _vec )
+    {
+        cc = &_cc;
+        sz1 = _sz1;
+        stripSize = _stripSize;
+        yStep = _yStep;
+        factor = _factor;
+        vec = &_vec;
+    }
+    
+    void operator()(const BlockedRange& range) const
+    {
+        Ptr<FeatureEvaluator> feval = cc->feval->clone();
+        int y1 = range.begin()*stripSize, y2 = min(range.end()*stripSize, sz1.height);
+        Size winSize(cvRound(cc->origWinSize.width*factor), cvRound(cc->origWinSize.height*factor));
+            
+        for( int y = y1; y < y2; y += yStep )
+            for( int x = 0; x < sz1.width; x += yStep )
+            {
+                int r = cc->runAt(feval, Point(x, y));
+                if( r > 0 )
+                    vec->push_back(Rect(cvRound(x*factor), cvRound(y*factor),
+                                        winSize.width, winSize.height));
+                if( r == 0 )
+                    x += yStep;
+            }
+    }
+    
+    CascadeClassifier* cc;
+    Size sz1;
+    int stripSize, yStep;
+    double factor;
+    ConcurrentRectVector* vec;
+};
+    
     
 struct getRect { Rect operator ()(const CvAvgComp& e) const { return e.rect; } };
 
@@ -757,6 +851,8 @@ void CascadeClassifier::detectMultiScale( const Mat& image, vector<Rect>& object
                                           double scaleFactor, int minNeighbors,
                                           int flags, Size minSize )
 {
+    const double GROUP_EPS = 0.2;
+    
     CV_Assert( scaleFactor > 1 && image.depth() == CV_8U );
     
     if( empty() )
@@ -786,16 +882,7 @@ void CascadeClassifier::detectMultiScale( const Mat& image, vector<Rect>& object
         img = temp;
     }
     
-    int maxNumThreads = 1;
-#ifdef _OPENMP
-       maxNumThreads = omp_get_num_procs();
-#endif
-
-    vector<vector<Rect> > rects( maxNumThreads );
-    vector<Rect>* rectsPtr = &rects[0];
-    vector<Ptr<FeatureEvaluator> > fevals( maxNumThreads );
-    fevals[0] = feval;
-    Ptr<FeatureEvaluator>* fevalsPtr = &fevals[0];
+    ConcurrentRectVector allCandidates;
 
     for( double factor = 1; ; factor *= scaleFactor )
     {
@@ -810,54 +897,27 @@ void CascadeClassifier::detectMultiScale( const Mat& image, vector<Rect>& object
             continue;
         
         int yStep = factor > 2. ? 1 : 2;
-        if( maxNumThreads > 1 )
-        {
-            stripCount = max(min(sz1.height/yStep, maxNumThreads*3), 1);
-            stripSize = (sz1.height + stripCount - 1)/stripCount;
-            stripSize = (stripSize/yStep)*yStep;
-        }
-        else
-        {
-            stripCount = 1;
-            stripSize = sz1.height;
-        }
+    #ifdef HAVE_TBB
+        const int PTS_PER_THREAD = 100;
+        stripCount = max(((sz1.height*sz1.width)/(yStep*yStep) + PTS_PER_THREAD/2)/PTS_PER_THREAD, 1);
+        stripSize = (sz1.height + stripCount - 1)/stripCount;
+        stripSize = (stripSize/yStep)*yStep;
+    #else
+        stripCount = 1;
+        stripSize = sz1.height;
+    #endif
 
         Mat img1( sz, CV_8U, imgbuf.data );
         resize( img, img1, sz, 0, 0, CV_INTER_LINEAR );
         if( !feval->setImage( img1, origWinSize ) )
             break;
-        for( int i = 1; i < maxNumThreads; i++ )
-            fevalsPtr[i] = feval->clone();
-        
-#ifdef _OPENMP
-#pragma omp parallel for num_threads(maxNumThreads) schedule(dynamic)
-#endif
-        for( int i = 0; i < stripCount; i++ )
-        {
-                       int threadIdx = 0;
-#ifdef _OPENMP
-                       threadIdx = omp_get_thread_num();
-#endif
-            int y1 = i*stripSize, y2 = (i+1)*stripSize;
-            if( i == stripCount - 1 || y2 > sz1.height )
-                y2 = sz1.height;
-            Size ssz(sz1.width, y2 - y1);
-
-            for( int y = y1; y < y2; y += yStep )
-                for( int x = 0; x < ssz.width; x += yStep )
-                {
-                    int r = runAt(fevalsPtr[threadIdx], Point(x,y));
-                    if( r > 0 )
-                        rectsPtr[threadIdx].push_back(Rect(cvRound(x*factor), cvRound(y*factor),
-                                               winSize.width, winSize.height));
-                    else if( r == 0 )
-                        x += yStep;
-                }
-        }
+        parallel_for(BlockedRange(0, stripCount), CascadeClassifierInvoker(*this, sz1, stripSize, yStep, factor, allCandidates));
     }
-    for( vector< vector<Rect> >::const_iterator it = rects.begin(); it != rects.end(); it++ )
-        objects.insert( objects.end(), it->begin(), it->end() );
-    groupRectangles( objects, minNeighbors, 0.2 );
+    
+    objects.resize(allCandidates.size());
+    std::copy(allCandidates.begin(), allCandidates.end(), objects.begin());
+    groupRectangles( objects, minNeighbors, GROUP_EPS );
 }    
 
     
index 6f6b11fb0ca422f81542164c23fc18cce845967d..8f80f2ed8452707713e3b5e898244f2ff9c702d3 100644 (file)
@@ -440,18 +440,127 @@ icvGetDistanceTransformMask( int maskType, float *metrics )
     return CV_OK;\r
 }\r
 \r
+namespace cv\r
+{\r
+\r
+struct DTColumnInvoker\r
+{\r
+    DTColumnInvoker( const CvMat* _src, CvMat* _dst, const int* _sat_tab, const float* _sqr_tab)\r
+    {\r
+        src = _src;\r
+        dst = _dst;\r
+        sat_tab = _sat_tab + src->rows*2 + 1;\r
+        sqr_tab = _sqr_tab;\r
+    }\r
+    \r
+    void operator()( const BlockedRange& range ) const\r
+    {\r
+        int i, i1 = range.begin(), i2 = range.end();\r
+        int m = src->rows;\r
+        size_t sstep = src->step, dstep = dst->step/sizeof(float);\r
+        AutoBuffer<int> _d(m);\r
+        int* d = _d;\r
+        \r
+        for( i = i1; i < i2; i++ )\r
+        {\r
+            const uchar* sptr = src->data.ptr + i + (m-1)*sstep;\r
+            float* dptr = dst->data.fl + i;\r
+            int j, dist = m-1;\r
+            \r
+            for( j = m-1; j >= 0; j--, sptr -= sstep )\r
+            {\r
+                dist = (dist + 1) & (sptr[0] == 0 ? 0 : -1);\r
+                d[j] = dist;\r
+            }\r
+            \r
+            dist = m-1;\r
+            for( j = 0; j < m; j++, dptr += dstep )\r
+            {\r
+                dist = dist + 1 - sat_tab[dist - d[j]];\r
+                d[j] = dist;\r
+                dptr[0] = sqr_tab[dist];\r
+            }\r
+        }\r
+    }\r
+    \r
+    const CvMat* src;\r
+    CvMat* dst;\r
+    const int* sat_tab;\r
+    const float* sqr_tab;\r
+};\r
+    \r
+    \r
+struct DTRowInvoker\r
+{\r
+    DTRowInvoker( CvMat* _dst, const float* _sqr_tab, const float* _inv_tab )\r
+    {\r
+        dst = _dst;\r
+        sqr_tab = _sqr_tab;\r
+        inv_tab = _inv_tab;\r
+    }\r
+    \r
+    void operator()( const BlockedRange& range ) const\r
+    {\r
+        const float inf = 1e6f;\r
+        int i, i1 = range.begin(), i2 = range.end();\r
+        int n = dst->cols;\r
+        AutoBuffer<uchar> _buf((n+2)*2*sizeof(float) + (n+2)*sizeof(int));\r
+        float* f = (float*)(uchar*)_buf;\r
+        float* z = f + n;\r
+        int* v = alignPtr((int*)(z + n + 1), sizeof(int));\r
+       \r
+        for( i = i1; i < i2; i++ )\r
+        {\r
+            float* d = (float*)(dst->data.ptr + i*dst->step);\r
+            int p, q, k;\r
+            \r
+            v[0] = 0;\r
+            z[0] = -inf;\r
+            z[1] = inf;\r
+            f[0] = d[0];\r
+            \r
+            for( q = 1, k = 0; q < n; q++ )\r
+            {\r
+                float fq = d[q];\r
+                f[q] = fq;\r
+                \r
+                for(;;k--)\r
+                {\r
+                    p = v[k];\r
+                    float s = (fq + sqr_tab[q] - d[p] - sqr_tab[p])*inv_tab[q - p];\r
+                    if( s > z[k] )\r
+                    {\r
+                        k++;\r
+                        v[k] = q;\r
+                        z[k] = s;\r
+                        z[k+1] = inf;\r
+                        break;\r
+                    }\r
+                }\r
+            }\r
+            \r
+            for( q = 0, k = 0; q < n; q++ )\r
+            {\r
+                while( z[k+1] < q )\r
+                    k++;\r
+                p = v[k];\r
+                d[q] = std::sqrt(sqr_tab[std::abs(q - p)] + f[p]);\r
+            }\r
+        }\r
+    }\r
+    \r
+    CvMat* dst;\r
+    const float* sqr_tab;\r
+    const float* inv_tab;\r
+};\r
+\r
+}\r
 \r
 static void\r
 icvTrueDistTrans( const CvMat* src, CvMat* dst )\r
 {\r
-    cv::Ptr<CvMat> buffer = 0;\r
-\r
-    int i, m, n;\r
-    int sstep, dstep;\r
     const float inf = 1e6f;\r
-    int thread_count = cvGetNumThreads();\r
-    int pass1_sz, pass2_sz;\r
-\r
+    \r
     if( !CV_ARE_SIZES_EQ( src, dst ))\r
         CV_Error( CV_StsUnmatchedSizes, "" );\r
 \r
@@ -460,24 +569,13 @@ icvTrueDistTrans( const CvMat* src, CvMat* dst )
         CV_Error( CV_StsUnsupportedFormat,\r
         "The input image must have 8uC1 type and the output one must have 32fC1 type" );\r
 \r
-    m = src->rows;\r
-    n = src->cols;\r
-\r
-    // (see stage 1 below):\r
-    // sqr_tab: 2*m, sat_tab: 3*m + 1, d: m*thread_count,\r
-    pass1_sz = src->rows*(5 + thread_count) + 1;\r
-    // (see stage 2):\r
-    // sqr_tab & inv_tab: n each; f & v: n*thread_count each; z: (n+1)*thread_count\r
-    pass2_sz = src->cols*(2 + thread_count*3) + thread_count;\r
-    buffer = cvCreateMat( 1, MAX(pass1_sz, pass2_sz), CV_32FC1 );\r
-\r
-    sstep = src->step;\r
-    dstep = dst->step / sizeof(float);\r
+    int i, m = src->rows, n = src->cols;\r
 \r
+    cv::AutoBuffer<uchar> _buf(std::max(m*2*sizeof(float) + (m*3+1)*sizeof(int), n*2*sizeof(float)));\r
     // stage 1: compute 1d distance transform of each column\r
-    float* sqr_tab = buffer->data.fl;\r
-    int* sat_tab = (int*)(sqr_tab + m*2);\r
-    const int shift = m*2;\r
+    float* sqr_tab = (float*)(uchar*)_buf;\r
+    int* sat_tab = cv::alignPtr((int*)(sqr_tab + m*2), sizeof(int));\r
+    int shift = m*2;\r
 \r
     for( i = 0; i < m; i++ )\r
         sqr_tab[i] = (float)(i*i);\r
@@ -488,35 +586,11 @@ icvTrueDistTrans( const CvMat* src, CvMat* dst )
     for( ; i <= m*3; i++ )\r
         sat_tab[i] = i - shift;\r
 \r
-#ifdef _OPENMP\r
-    #pragma omp parallel for num_threads(thread_count)\r
-#endif\r
-    for( i = 0; i < n; i++ )\r
-    {\r
-        const uchar* sptr = src->data.ptr + i + (m-1)*sstep;\r
-        float* dptr = dst->data.fl + i;\r
-        int* d = (int*)(sat_tab + m*3+1+m*cvGetThreadNum());\r
-        int j, dist = m-1;\r
-\r
-        for( j = m-1; j >= 0; j--, sptr -= sstep )\r
-        {\r
-            dist = (dist + 1) & (sptr[0] == 0 ? 0 : -1);\r
-            d[j] = dist;\r
-        }\r
-\r
-        dist = m-1;\r
-        for( j = 0; j < m; j++, dptr += dstep )\r
-        {\r
-            dist = dist + 1 - sat_tab[dist + 1 - d[j] + shift];\r
-            d[j] = dist;\r
-            dptr[0] = sqr_tab[dist];\r
-        }\r
-    }\r
+    cv::parallel_for(cv::BlockedRange(0, n), cv::DTColumnInvoker(src, dst, sat_tab, sqr_tab)); \r
 \r
     // stage 2: compute modified distance transform for each row\r
-    float* inv_tab = buffer->data.fl;\r
-    sqr_tab = inv_tab + n;\r
-\r
+    float* inv_tab = sqr_tab + n;\r
+    \r
     inv_tab[0] = sqr_tab[0] = 0.f;\r
     for( i = 1; i < n; i++ )\r
     {\r
@@ -524,52 +598,7 @@ icvTrueDistTrans( const CvMat* src, CvMat* dst )
         sqr_tab[i] = (float)(i*i);\r
     }\r
 \r
-#ifdef _OPENMP\r
-    #pragma omp parallel for num_threads(thread_count) schedule(dynamic)\r
-#endif\r
-    for( i = 0; i < m; i++ )\r
-    {\r
-        float* d = (float*)(dst->data.ptr + i*dst->step);\r
-        float* f = sqr_tab + n + (n*3+1)*cvGetThreadNum();\r
-        float* z = f + n;\r
-        int* v = (int*)(z + n + 1);\r
-        int p, q, k;\r
-\r
-        v[0] = 0;\r
-        z[0] = -inf;\r
-        z[1] = inf;\r
-        f[0] = d[0];\r
-\r
-        for( q = 1, k = 0; q < n; q++ )\r
-        {\r
-            float fq = d[q];\r
-            f[q] = fq;\r
-\r
-            for(;;k--)\r
-            {\r
-                p = v[k];\r
-                float s = (fq + sqr_tab[q] - d[p] - sqr_tab[p])*inv_tab[q - p];\r
-                if( s > z[k] )\r
-                {\r
-                    k++;\r
-                    v[k] = q;\r
-                    z[k] = s;\r
-                    z[k+1] = inf;\r
-                    break;\r
-                }\r
-            }\r
-        }\r
-\r
-        for( q = 0, k = 0; q < n; q++ )\r
-        {\r
-            while( z[k+1] < q )\r
-                k++;\r
-            p = v[k];\r
-            d[q] = sqr_tab[abs(q - p)] + f[p];\r
-        }\r
-    }\r
-\r
-    cvPow( dst, dst, 0.5 );\r
+    cv::parallel_for(cv::BlockedRange(0, m), cv::DTRowInvoker(dst, sqr_tab, inv_tab));\r
 }\r
 \r
 \r
index 169f1da6c011aa1cc6d35bcb6310465b52c82cf0..24346ad48b57c1cbf74fb983b4900a2361edbfa4 100644 (file)
@@ -426,7 +426,7 @@ cvSetImagesForHaarClassifierCascade( CvHaarClassifierCascade* _cascade,
     CvHidHaarClassifierCascade* cascade;
     int coi0 = 0, coi1 = 0;
     int i;
-    CvRect equ_rect;
+    CvRect equRect;
     double weight_scale;
 
     if( !CV_IS_HAAR_CLASSIFIER(_cascade) )
@@ -478,30 +478,26 @@ cvSetImagesForHaarClassifierCascade( CvHaarClassifierCascade* _cascade,
     cascade->sum = *sum;
     cascade->sqsum = *sqsum;
 
-    equ_rect.x = equ_rect.y = cvRound(scale);
-    equ_rect.width = cvRound((_cascade->orig_window_size.width-2)*scale);
-    equ_rect.height = cvRound((_cascade->orig_window_size.height-2)*scale);
-    weight_scale = 1./(equ_rect.width*equ_rect.height);
+    equRect.x = equRect.y = cvRound(scale);
+    equRect.width = cvRound((_cascade->orig_window_size.width-2)*scale);
+    equRect.height = cvRound((_cascade->orig_window_size.height-2)*scale);
+    weight_scale = 1./(equRect.width*equRect.height);
     cascade->inv_window_area = weight_scale;
 
-    cascade->p0 = sum_elem_ptr(*sum, equ_rect.y, equ_rect.x);
-    cascade->p1 = sum_elem_ptr(*sum, equ_rect.y, equ_rect.x + equ_rect.width );
-    cascade->p2 = sum_elem_ptr(*sum, equ_rect.y + equ_rect.height, equ_rect.x );
-    cascade->p3 = sum_elem_ptr(*sum, equ_rect.y + equ_rect.height,
-                                     equ_rect.x + equ_rect.width );
+    cascade->p0 = sum_elem_ptr(*sum, equRect.y, equRect.x);
+    cascade->p1 = sum_elem_ptr(*sum, equRect.y, equRect.x + equRect.width );
+    cascade->p2 = sum_elem_ptr(*sum, equRect.y + equRect.height, equRect.x );
+    cascade->p3 = sum_elem_ptr(*sum, equRect.y + equRect.height,
+                                     equRect.x + equRect.width );
 
-    cascade->pq0 = sqsum_elem_ptr(*sqsum, equ_rect.y, equ_rect.x);
-    cascade->pq1 = sqsum_elem_ptr(*sqsum, equ_rect.y, equ_rect.x + equ_rect.width );
-    cascade->pq2 = sqsum_elem_ptr(*sqsum, equ_rect.y + equ_rect.height, equ_rect.x );
-    cascade->pq3 = sqsum_elem_ptr(*sqsum, equ_rect.y + equ_rect.height,
-                                          equ_rect.x + equ_rect.width );
+    cascade->pq0 = sqsum_elem_ptr(*sqsum, equRect.y, equRect.x);
+    cascade->pq1 = sqsum_elem_ptr(*sqsum, equRect.y, equRect.x + equRect.width );
+    cascade->pq2 = sqsum_elem_ptr(*sqsum, equRect.y + equRect.height, equRect.x );
+    cascade->pq3 = sqsum_elem_ptr(*sqsum, equRect.y + equRect.height,
+                                          equRect.x + equRect.width );
 
     /* init pointers in haar features according to real window size and
        given image pointers */
-#ifdef _OPENMP
-    int max_threads = cvGetNumThreads();
-    #pragma omp parallel for num_threads(max_threads) schedule(dynamic)
-#endif // _OPENMP
     for( i = 0; i < _cascade->count; i++ )
     {
         int j, k, l;
@@ -517,13 +513,12 @@ cvSetImagesForHaarClassifierCascade( CvHaarClassifierCascade* _cascade,
                     &cascade->stage_classifier[i].classifier[j].node[l].feature;
                 double sum0 = 0, area0 = 0;
                 CvRect r[3];
-#if CV_ADJUST_FEATURES
+
                 int base_w = -1, base_h = -1;
                 int new_base_w = 0, new_base_h = 0;
                 int kx, ky;
                 int flagx = 0, flagy = 0;
                 int x0 = 0, y0 = 0;
-#endif
                 int nr;
 
                 /* align blocks */
@@ -531,18 +526,15 @@ cvSetImagesForHaarClassifierCascade( CvHaarClassifierCascade* _cascade,
                 {
                     if( !hidfeature->rect[k].p0 )
                         break;
-#if CV_ADJUST_FEATURES
                     r[k] = feature->rect[k].r;
                     base_w = (int)CV_IMIN( (unsigned)base_w, (unsigned)(r[k].width-1) );
                     base_w = (int)CV_IMIN( (unsigned)base_w, (unsigned)(r[k].x - r[0].x-1) );
                     base_h = (int)CV_IMIN( (unsigned)base_h, (unsigned)(r[k].height-1) );
                     base_h = (int)CV_IMIN( (unsigned)base_h, (unsigned)(r[k].y - r[0].y-1) );
-#endif
                 }
 
                 nr = k;
 
-#if CV_ADJUST_FEATURES
                 base_w += 1;
                 base_h += 1;
                 kx = r[0].width / base_w;
@@ -561,34 +553,29 @@ cvSetImagesForHaarClassifierCascade( CvHaarClassifierCascade* _cascade,
                     new_base_h = cvRound( r[0].height * scale ) / ky;
                     y0 = cvRound( r[0].y * scale );
                 }
-#endif
 
                 for( k = 0; k < nr; k++ )
                 {
                     CvRect tr;
                     double correction_ratio;
 
-#if CV_ADJUST_FEATURES
                     if( flagx )
                     {
                         tr.x = (r[k].x - r[0].x) * new_base_w / base_w + x0;
                         tr.width = r[k].width * new_base_w / base_w;
                     }
                     else
-#endif
                     {
                         tr.x = cvRound( r[k].x * scale );
                         tr.width = cvRound( r[k].width * scale );
                     }
 
-#if CV_ADJUST_FEATURES
                     if( flagy )
                     {
                         tr.y = (r[k].y - r[0].y) * new_base_h / base_h + y0;
                         tr.height = r[k].height * new_base_h / base_h;
                     }
                     else
-#endif
                     {
                         tr.y = cvRound( r[k].y * scale );
                         tr.height = cvRound( r[k].height * scale );
@@ -600,7 +587,7 @@ cvSetImagesForHaarClassifierCascade( CvHaarClassifierCascade* _cascade,
                     const float orig_feature_size =  (float)(feature->rect[k].r.width)*feature->rect[k].r.height;
                     const float orig_norm_size = (float)(_cascade->orig_window_size.width)*(_cascade->orig_window_size.height);
                     const float feature_size = float(tr.width*tr.height);
-                    //const float normSize    = float(equ_rect.width*equ_rect.height);
+                    //const float normSize    = float(equRect.width*equRect.height);
                     float target_ratio = orig_feature_size / orig_norm_size;
                     //float isRatio = featureSize / normSize;
                     //correctionRatio = targetRatio / isRatio / normSize;
@@ -827,46 +814,193 @@ cvRunHaarClassifierCascade( const CvHaarClassifierCascade* _cascade,
 }
 
 
-static int is_equal( const void* _r1, const void* _r2, void* )
+namespace cv
 {
-    const CvRect* r1 = (const CvRect*)_r1;
-    const CvRect* r2 = (const CvRect*)_r2;
-    int distance = cvRound(r1->width*0.2);
-
-    return r2->x <= r1->x + distance &&
-           r2->x >= r1->x - distance &&
-           r2->y <= r1->y + distance &&
-           r2->y >= r1->y - distance &&
-           r2->width <= cvRound( r1->width * 1.2 ) &&
-           cvRound( r2->width * 1.2 ) >= r1->width;
-}
 
+struct HaarDetectObjects_ScaleImage_Invoker
+{
+    HaarDetectObjects_ScaleImage_Invoker( const CvHaarClassifierCascade* _cascade,
+                                          int _stripSize, double _factor,
+                                          const Mat& _sum1, const Mat& _sqsum1, Mat& _norm1,
+                                          Mat& _mask1, Rect _equRect, ConcurrentRectVector& _vec )
+    {
+        cascade = _cascade;
+        stripSize = _stripSize;
+        factor = _factor;
+        sum1 = _sum1;
+        sqsum1 = _sqsum1;
+        norm1 = _norm1;
+        mask1 = _mask1;
+        equRect = _equRect;
+        vec = &_vec;
+    }
+    
+    void operator()( const BlockedRange& range ) const
+    {
+        Size winSize0 = cascade->orig_window_size;
+        Size winSize(cvRound(winSize0.width*factor), cvRound(winSize0.height*factor));
+        int y1 = range.begin()*stripSize, y2 = min(range.end()*stripSize, sum1.rows - 1 - winSize0.height);
+        Size ssz(sum1.cols - 1 - winSize0.width, y2 - y1);
+        int x, y, ystep = factor > 2 ? 1 : 2;
+        
+    #ifdef HAVE_IPP
+        if( cascade->hid_cascade->ipp_stages )
+        {
+            ippiRectStdDev_32f_C1R(sum1.ptr<float>(y1), sum1.step,
+                                   sqsum1.ptr<double>(y1), sqsum1.step,
+                                   norm1.ptr<float>(y1), norm1.step,
+                                   ippiSize(ssz.width, ssz.height), equRect );
+            
+            int positive = (ssz.width/ystep)*((ssz.height + ystep-1)/ystep);
+            
+            if( ystep == 1 )
+                mask1 = Scalar::all(1);
+            else
+                for( y = y1; y < y2; y++ )
+                {
+                    uchar* mask1row = mask1.ptr(y);
+                    memset( mask1row, 0, ssz.width );
+                    
+                    if( y % ystep == 0 )
+                        for( x = 0; x < ssz.width; x += ystep )
+                            mask1row[x] = (uchar)1;
+                }
+            
+            for( int j = 0; j < cascade->count; j++ )
+            {
+                if( ippiApplyHaarClassifier_32f_C1R(
+                            sum1.ptr<float>(y1), sum1.step,
+                            norm1.ptr<float>(y1), norm1.step,
+                            mask1.ptr<uchar>(y1), mask1.step,
+                            ippiSize(ssz.width, ssz.height), &positive,
+                            cascade->hid_cascade->stage_classifier[j].threshold,
+                            (IppiHaarClassifier_32f*)cascade->hid_cascade->ipp_stages[j]) < 0 )
+                    positive = 0;
+                if( positive <= 0 )
+                    break;
+            }
+            
+            if( positive > 0 )
+                for( y = y1; y < y2; y += ystep )
+                {
+                    uchar* mask1row = mask1.row(y);
+                    for( x = 0; x < ssz.width; x += ystep )
+                        if( mask1row[x] != 0 )
+                        {
+                            vec->push_back(Rect(cvRound(x*factor), cvRound(y*factor),
+                                                winSize.width, winSize.height));
+                            if( --positive == 0 )
+                                break;
+                        }
+                    if( positive == 0 )
+                        break;
+                }
+        }
+        else
+#endif
+            for( y = y1; y < y2; y += ystep )
+                for( x = 0; x < ssz.width; x += ystep )
+                {
+                    if( cvRunHaarClassifierCascade( cascade, cvPoint(x,y), 0 ) > 0 )
+                        vec->push_back(Rect(cvRound(x*factor), cvRound(y*factor),
+                                            winSize.width, winSize.height)); 
+                }
+    }
+    
+    const CvHaarClassifierCascade* cascade;
+    int stripSize;
+    double factor;
+    Mat sum1, sqsum1, norm1, mask1;
+    Rect equRect;
+    ConcurrentRectVector* vec;
+};
+    
 
-#define VERY_ROUGH_SEARCH 0
+struct HaarDetectObjects_ScaleCascade_Invoker
+{
+    HaarDetectObjects_ScaleCascade_Invoker( const CvHaarClassifierCascade* _cascade,
+                                            Size _winsize, const Range& _xrange, double _ystep,
+                                            size_t _sumstep, const int** _p, const int** _pq,
+                                            ConcurrentRectVector& _vec )
+    {
+        cascade = _cascade;
+        winsize = _winsize;
+        xrange = _xrange;
+        ystep = _ystep;
+        sumstep = _sumstep;
+        p = _p; pq = _pq;
+        vec = &_vec;
+    }
+    
+    void operator()( const BlockedRange& range ) const
+    {
+        int iy, startY = range.begin(), endY = range.end();
+        const int *p0 = p[0], *p1 = p[1], *p2 = p[2], *p3 = p[3];
+        const int *pq0 = pq[0], *pq1 = pq[1], *pq2 = pq[2], *pq3 = pq[3];
+        bool doCannyPruning = p0 != 0;
+        int sstep = sumstep/sizeof(p0[0]);
+        
+        for( iy = startY; iy < endY; iy++ )
+        {
+            int ix, y = cvRound(iy*ystep), ixstep = 1;
+            for( ix = xrange.start; ix < xrange.end; ix += ixstep )
+            {
+                int x = cvRound(ix*ystep); // it should really be ystep, not ixstep
+                
+                if( doCannyPruning )
+                {
+                    int offset = y*sstep + x;
+                    int s = p0[offset] - p1[offset] - p2[offset] + p3[offset];
+                    int sq = pq0[offset] - pq1[offset] - pq2[offset] + pq3[offset];
+                    if( s < 100 || sq < 20 )
+                    {
+                        ixstep = 2;
+                        continue;
+                    }
+                }
+                
+                int result = cvRunHaarClassifierCascade( cascade, cvPoint(x, y), 0 );
+                if( result > 0 )
+                    vec->push_back(Rect(x, y, winsize.width, winsize.height));
+                ixstep = result != 0 ? 1 : 2;
+            }
+        }
+    }
+    
+    const CvHaarClassifierCascade* cascade;
+    double ystep;
+    size_t sumstep;
+    Size winsize;
+    Range xrange;
+    const int** p;
+    const int** pq;
+    ConcurrentRectVector* vec;
+};
+    
+    
+}
+    
 
 CV_IMPL CvSeq*
 cvHaarDetectObjects( const CvArr* _img,
                      CvHaarClassifierCascade* cascade,
-                     CvMemStorage* storage, double scale_factor,
-                     int min_neighbors, int flags, CvSize min_size )
+                     CvMemStorage* storage, double scaleFactor,
+                     int minNeighbors, int flags, CvSize minSize )
 {
-    int split_stage = 2;
-
+    const double GROUP_EPS = 0.2;
     CvMat stub, *img = (CvMat*)_img;
-    cv::Ptr<CvMat> temp, sum, tilted, sqsum, norm_img, sumcanny, img_small;
+    cv::Ptr<CvMat> temp, sum, tilted, sqsum, normImg, sumcanny, imgSmall;
     CvSeq* result_seq = 0;
     cv::Ptr<CvMemStorage> temp_storage;
-    cv::AutoBuffer<CvAvgComp> comps;
-    CvSeq* seq_thread[CV_MAX_THREADS] = {0};
-    int i, max_threads = 0;
 
-    CvSeq *seq = 0, *seq2 = 0, *idx_seq = 0, *big_seq = 0;
-    CvAvgComp result_comp = {{0,0,0,0},0};
+    cv::ConcurrentRectVector allCandidates;
+    std::vector<cv::Rect> rectList;
+    std::vector<int> rweights;
     double factor;
-    int npass = 2, coi;
-    bool do_canny_pruning = (flags & CV_HAAR_DO_CANNY_PRUNING) != 0;
-    bool find_biggest_object = (flags & CV_HAAR_FIND_BIGGEST_OBJECT) != 0;
-    bool rough_search = (flags & CV_HAAR_DO_ROUGH_SEARCH) != 0;
+    int coi;
+    bool doCannyPruning = (flags & CV_HAAR_DO_CANNY_PRUNING) != 0;
+    bool findBiggestObject = (flags & CV_HAAR_FIND_BIGGEST_OBJECT) != 0;
+    bool roughSearch = (flags & CV_HAAR_DO_ROUGH_SEARCH) != 0;
 
     if( !CV_IS_HAAR_CLASSIFIER(cascade) )
         CV_Error( !cascade ? CV_StsNullPtr : CV_StsBadArg, "Invalid classifier cascade" );
@@ -881,16 +1015,15 @@ cvHaarDetectObjects( const CvArr* _img,
     if( CV_MAT_DEPTH(img->type) != CV_8U )
         CV_Error( CV_StsUnsupportedFormat, "Only 8-bit images are supported" );
     
-    if( scale_factor <= 1 )
+    if( scaleFactor <= 1 )
         CV_Error( CV_StsOutOfRange, "scale factor must be > 1" );
 
-    if( find_biggest_object )
+    if( findBiggestObject )
         flags &= ~CV_HAAR_SCALE_IMAGE;
 
     temp = cvCreateMat( img->rows, img->cols, CV_8UC1 );
     sum = cvCreateMat( img->rows + 1, img->cols + 1, CV_32SC1 );
     sqsum = cvCreateMat( img->rows + 1, img->cols + 1, CV_64FC1 );
-    temp_storage = cvCreateChildMemStorage( storage );
 
     if( !cascade->hid_cascade )
         icvCreateHidHaarClassifierCascade(cascade);
@@ -898,64 +1031,48 @@ cvHaarDetectObjects( const CvArr* _img,
     if( cascade->hid_cascade->has_tilted_features )
         tilted = cvCreateMat( img->rows + 1, img->cols + 1, CV_32SC1 );
 
-    seq = cvCreateSeq( 0, sizeof(CvSeq), sizeof(CvRect), temp_storage );
-    seq2 = cvCreateSeq( 0, sizeof(CvSeq), sizeof(CvAvgComp), temp_storage );
     result_seq = cvCreateSeq( 0, sizeof(CvSeq), sizeof(CvAvgComp), storage );
 
-    max_threads = cvGetNumThreads();
-    if( max_threads > 1 )
-        for( i = 0; i < max_threads; i++ )
-        {
-            CvMemStorage* temp_storage_thread;
-            temp_storage_thread = cvCreateMemStorage(0);
-            seq_thread[i] = cvCreateSeq( 0, sizeof(CvSeq),
-                sizeof(CvRect), temp_storage_thread );
-        }
-    else
-        seq_thread[0] = seq;
-
     if( CV_MAT_CN(img->type) > 1 )
     {
         cvCvtColor( img, temp, CV_BGR2GRAY );
         img = temp;
     }
 
-    if( flags & CV_HAAR_FIND_BIGGEST_OBJECT )
+    if( findBiggestObject )
         flags &= ~(CV_HAAR_SCALE_IMAGE|CV_HAAR_DO_CANNY_PRUNING);
 
     if( flags & CV_HAAR_SCALE_IMAGE )
     {
-        CvSize win_size0 = cascade->orig_window_size;
+        CvSize winSize0 = cascade->orig_window_size;
 #ifdef HAVE_IPP
         int use_ipp = cascade->hid_cascade->ipp_stages != 0;
 
         if( use_ipp )
-            norm_img = cvCreateMat( img->rows, img->cols, CV_32FC1 );
+            normImg = cvCreateMat( img->rows, img->cols, CV_32FC1 );
 #endif
-        img_small = cvCreateMat( img->rows + 1, img->cols + 1, CV_8UC1 );
+        imgSmall = cvCreateMat( img->rows + 1, img->cols + 1, CV_8UC1 );
 
-        for( factor = 1; ; factor *= scale_factor )
+        for( factor = 1; ; factor *= scaleFactor )
         {
-            int strip_count, strip_size;
-            int ystep = factor > 2. ? 1 : 2;
-            CvSize win_size = { cvRound(win_size0.width*factor),
-                                cvRound(win_size0.height*factor) };
+            CvSize winSize = { cvRound(winSize0.width*factor),
+                                cvRound(winSize0.height*factor) };
             CvSize sz = { cvRound( img->cols/factor ), cvRound( img->rows/factor ) };
-            CvSize sz1 = { sz.width - win_size0.width, sz.height - win_size0.height };
-#ifdef HAVE_IPP
-            IppiRect equ_rect = { icv_object_win_border, icv_object_win_border,
-                win_size0.width - icv_object_win_border*2,
-                win_size0.height - icv_object_win_border*2 };
-#endif
+            CvSize sz1 = { sz.width - winSize0.width, sz.height - winSize0.height };
+
+            CvRect equRect = { icv_object_win_border, icv_object_win_border,
+                winSize0.width - icv_object_win_border*2,
+                winSize0.height - icv_object_win_border*2 };
+
             CvMat img1, sum1, sqsum1, norm1, tilted1, mask1;
             CvMat* _tilted = 0;
 
             if( sz1.width <= 0 || sz1.height <= 0 )
                 break;
-            if( win_size.width < min_size.width || win_size.height < min_size.height )
+            if( winSize.width < minSize.width || winSize.height < minSize.height )
                 continue;
 
-            img1 = cvMat( sz.height, sz.width, CV_8UC1, img_small->data.ptr );
+            img1 = cvMat( sz.height, sz.width, CV_8UC1, imgSmall->data.ptr );
             sum1 = cvMat( sz.height+1, sz.width+1, CV_32SC1, sum->data.ptr );
             sqsum1 = cvMat( sz.height+1, sz.width+1, CV_64FC1, sqsum->data.ptr );
             if( tilted )
@@ -963,181 +1080,82 @@ cvHaarDetectObjects( const CvArr* _img,
                 tilted1 = cvMat( sz.height+1, sz.width+1, CV_32SC1, tilted->data.ptr );
                 _tilted = &tilted1;
             }
-            norm1 = cvMat( sz1.height, sz1.width, CV_32FC1, norm_img ? norm_img->data.ptr : 0 );
+            norm1 = cvMat( sz1.height, sz1.width, CV_32FC1, normImg ? normImg->data.ptr : 0 );
             mask1 = cvMat( sz1.height, sz1.width, CV_8UC1, temp->data.ptr );
 
             cvResize( img, &img1, CV_INTER_LINEAR );
             cvIntegral( &img1, &sum1, &sqsum1, _tilted );
 
-            if( max_threads > 1 )
-            {
-                strip_count = MAX(MIN(sz1.height/ystep, max_threads*3), 1);
-                strip_size = (sz1.height + strip_count - 1)/strip_count;
-                strip_size = (strip_size / ystep)*ystep;
-            }
-            else
-            {
-                strip_count = 1;
-                strip_size = sz1.height;
-            }
-
+            int ystep = factor > 2 ? 1 : 2;
+        #ifdef HAVE_TBB
+            const int LOCS_PER_THREAD = 1000;
+            int stripCount = ((sz1.width/ystep)*(sz1.height + ystep-1)/ystep + LOCS_PER_THREAD/2)/LOCS_PER_THREAD;
+            stripCount = std::min(std::max(stripCount, 1), 100);
+        #else
+            const int stripCount = 1;
+        #endif
+            
 #ifdef HAVE_IPP
             if( use_ipp )
             {
-                for( i = 0; i <= sz.height; i++ )
-                {
-                    const int* isum = (int*)(sum1.data.ptr + sum1.step*i);
-                    float* fsum = (float*)isum;
-                    const int FLT_DELTA = -(1 << 24);
-                    int j;
-                    for( j = 0; j <= sz.width; j++ )
-                        fsum[j] = (float)(isum[j] + FLT_DELTA);
-                }
+                cv::Mat fsum(sum1.rows, sum1.cols, CV_32F, sum1.data.ptr, sum1.step);
+                cv::Mat(sum1).convertTo(fsum, CV_32F, 1, -(1<<24));
             }
             else
 #endif
-                cvSetImagesForHaarClassifierCascade( cascade, &sum1, &sqsum1, _tilted, 1. );
-
-        #ifdef _OPENMP
-            #pragma omp parallel for num_threads(max_threads) schedule(dynamic)
-        #endif
-            for( i = 0; i < strip_count; i++ )
-            {
-                int thread_id = cvGetThreadNum();
-                int positive = 0;
-                int y1 = i*strip_size, y2 = (i+1)*strip_size/* - ystep + 1*/;
-                CvSize ssz;
-                int x, y;
-                if( i == strip_count - 1 || y2 > sz1.height )
-                    y2 = sz1.height;
-                ssz = cvSize(sz1.width, y2 - y1);
-
-#ifdef HAVE_IPP
-                if( use_ipp )
-                {
-                    ippiRectStdDev_32f_C1R(
-                        (float*)(sum1.data.ptr + y1*sum1.step), sum1.step,
-                        (double*)(sqsum1.data.ptr + y1*sqsum1.step), sqsum1.step,
-                        (float*)(norm1.data.ptr + y1*norm1.step), norm1.step,
-                        ippiSize(ssz.width, ssz.height), equ_rect );
-
-                    positive = (ssz.width/ystep)*((ssz.height + ystep-1)/ystep);
-                    memset( mask1.data.ptr + y1*mask1.step, ystep == 1, mask1.height*mask1.step);
-                    
-                    if( ystep > 1 )
-                    {
-                        for( y = y1, positive = 0; y < y2; y += ystep )
-                            for( x = 0; x < ssz.width; x += ystep )
-                                mask1.data.ptr[mask1.step*y + x] = (uchar)1;
-                    }
-
-                    for( int j = 0; j < cascade->count; j++ )
-                    {
-                        if( ippiApplyHaarClassifier_32f_C1R(
-                            (float*)(sum1.data.ptr + y1*sum1.step), sum1.step,
-                            (float*)(norm1.data.ptr + y1*norm1.step), norm1.step,
-                            mask1.data.ptr + y1*mask1.step, mask1.step,
-                            ippiSize(ssz.width, ssz.height), &positive,
-                            cascade->hid_cascade->stage_classifier[j].threshold,
-                            (IppiHaarClassifier_32f*)cascade->hid_cascade->ipp_stages[j]) < 0 )
-                        {
-                            positive = 0;
-                            break;
-                        }
-                        if( positive <= 0 )
-                            break;
-                    }
-                }
-                else
-#endif
-                {
-                    for( y = y1, positive = 0; y < y2; y += ystep )
-                        for( x = 0; x < ssz.width; x += ystep )
-                        {
-                            mask1.data.ptr[mask1.step*y + x] =
-                                cvRunHaarClassifierCascade( cascade, cvPoint(x,y), 0 ) > 0;
-                            positive += mask1.data.ptr[mask1.step*y + x];
-                        }
-                }
-
-                if( positive > 0 )
-                {
-                    for( y = y1; y < y2; y += ystep )
-                        for( x = 0; x < ssz.width; x += ystep )
-                            if( mask1.data.ptr[mask1.step*y + x] != 0 )
-                            {
-                                CvRect obj_rect = { cvRound(x*factor), cvRound(y*factor),
-                                                    win_size.width, win_size.height };
-                                cvSeqPush( seq_thread[thread_id], &obj_rect );
-                            }
-                }
-            }
-
-            // gather the results
-            if( max_threads > 1 )
-                for( i = 0; i < max_threads; i++ )
-                {
-                    CvSeq* s = seq_thread[i];
-                    int j, total = s->total;
-                    CvSeqBlock* b = s->first;
-                    for( j = 0; j < total; j += b->count, b = b->next )
-                        cvSeqPushMulti( seq, b->data, b->count );
-                }
+                cvSetImagesForHaarClassifierCascade( cascade, &sum1, &sqsum1, _tilted, 1. );            
+            
+            cv::Mat _norm1(&norm1), _mask1(&mask1);
+            cv::parallel_for(cv::BlockedRange(0, stripCount),
+                         cv::HaarDetectObjects_ScaleImage_Invoker(cascade,
+                                (((sz1.height + stripCount - 1)/stripCount + ystep-1)/ystep)*ystep,
+                                factor, cv::Mat(&sum1), cv::Mat(&sqsum1), _norm1, _mask1,
+                                cv::Rect(equRect), allCandidates));
         }
     }
     else
     {
         int n_factors = 0;
-        CvRect scan_roi_rect = {0,0,0,0};
-        bool is_found = false, scan_roi = false;
+        cv::Rect scanROI;
 
         cvIntegral( img, sum, sqsum, tilted );
 
-        if( do_canny_pruning )
+        if( doCannyPruning )
         {
             sumcanny = cvCreateMat( img->rows + 1, img->cols + 1, CV_32SC1 );
             cvCanny( img, temp, 0, 50, 3 );
             cvIntegral( temp, sumcanny );
         }
 
-        if( (unsigned)split_stage >= (unsigned)cascade->count ||
-            cascade->hid_cascade->is_tree )
-        {
-            split_stage = cascade->count;
-            npass = 1;
-        }
-
         for( n_factors = 0, factor = 1;
              factor*cascade->orig_window_size.width < img->cols - 10 &&
              factor*cascade->orig_window_size.height < img->rows - 10;
-             n_factors++, factor *= scale_factor )
+             n_factors++, factor *= scaleFactor )
             ;
 
-        if( find_biggest_object )
+        if( findBiggestObject )
         {
-            scale_factor = 1./scale_factor;
-            factor *= scale_factor;
-            big_seq = cvCreateSeq( 0, sizeof(CvSeq), sizeof(CvRect), temp_storage );
+            scaleFactor = 1./scaleFactor;
+            factor *= scaleFactor;
         }
         else
             factor = 1;
 
-        for( ; n_factors-- > 0 && !is_found; factor *= scale_factor )
+        for( ; n_factors-- > 0; factor *= scaleFactor )
         {
-            const double ystep = MAX( 2, factor );
-            CvSize win_size = { cvRound( cascade->orig_window_size.width * factor ),
+            const double ystep = std::max( 2., factor );
+            CvSize winSize = { cvRound( cascade->orig_window_size.width * factor ),
                                 cvRound( cascade->orig_window_size.height * factor )};
-            CvRect equ_rect = { 0, 0, 0, 0 };
-            int *p0 = 0, *p1 = 0, *p2 = 0, *p3 = 0;
-            int *pq0 = 0, *pq1 = 0, *pq2 = 0, *pq3 = 0;
-            int pass, stage_offset = 0;
-            int start_x = 0, start_y = 0;
-            int end_x = cvRound((img->cols - win_size.width) / ystep);
-            int end_y = cvRound((img->rows - win_size.height) / ystep);
-
-            if( win_size.width < min_size.width || win_size.height < min_size.height )
+            CvRect equRect = { 0, 0, 0, 0 };
+            int *p[4] = {0,0,0,0};
+            int *pq[4] = {0,0,0,0};
+            int startX = 0, startY = 0;
+            int endX = cvRound((img->cols - winSize.width) / ystep);
+            int endY = cvRound((img->rows - winSize.height) / ystep);
+
+            if( winSize.width < minSize.width || winSize.height < minSize.height )
             {
-                if( find_biggest_object )
+                if( findBiggestObject )
                     break;
                 continue;
             }
@@ -1145,377 +1163,112 @@ cvHaarDetectObjects( const CvArr* _img,
             cvSetImagesForHaarClassifierCascade( cascade, sum, sqsum, tilted, factor );
             cvZero( temp );
 
-            if( do_canny_pruning )
+            if( doCannyPruning )
             {
-                equ_rect.x = cvRound(win_size.width*0.15);
-                equ_rect.y = cvRound(win_size.height*0.15);
-                equ_rect.width = cvRound(win_size.width*0.7);
-                equ_rect.height = cvRound(win_size.height*0.7);
-
-                p0 = (int*)(sumcanny->data.ptr + equ_rect.y*sumcanny->step) + equ_rect.x;
-                p1 = (int*)(sumcanny->data.ptr + equ_rect.y*sumcanny->step)
-                            + equ_rect.x + equ_rect.width;
-                p2 = (int*)(sumcanny->data.ptr + (equ_rect.y + equ_rect.height)*sumcanny->step) + equ_rect.x;
-                p3 = (int*)(sumcanny->data.ptr + (equ_rect.y + equ_rect.height)*sumcanny->step)
-                            + equ_rect.x + equ_rect.width;
-
-                pq0 = (int*)(sum->data.ptr + equ_rect.y*sum->step) + equ_rect.x;
-                pq1 = (int*)(sum->data.ptr + equ_rect.y*sum->step)
-                            + equ_rect.x + equ_rect.width;
-                pq2 = (int*)(sum->data.ptr + (equ_rect.y + equ_rect.height)*sum->step) + equ_rect.x;
-                pq3 = (int*)(sum->data.ptr + (equ_rect.y + equ_rect.height)*sum->step)
-                            + equ_rect.x + equ_rect.width;
+                equRect.x = cvRound(winSize.width*0.15);
+                equRect.y = cvRound(winSize.height*0.15);
+                equRect.width = cvRound(winSize.width*0.7);
+                equRect.height = cvRound(winSize.height*0.7);
+
+                p[0] = (int*)(sumcanny->data.ptr + equRect.y*sumcanny->step) + equRect.x;
+                p[1] = (int*)(sumcanny->data.ptr + equRect.y*sumcanny->step)
+                            + equRect.x + equRect.width;
+                p[2] = (int*)(sumcanny->data.ptr + (equRect.y + equRect.height)*sumcanny->step) + equRect.x;
+                p[3] = (int*)(sumcanny->data.ptr + (equRect.y + equRect.height)*sumcanny->step)
+                            + equRect.x + equRect.width;
+
+                pq[0] = (int*)(sum->data.ptr + equRect.y*sum->step) + equRect.x;
+                pq[1] = (int*)(sum->data.ptr + equRect.y*sum->step)
+                            + equRect.x + equRect.width;
+                pq[2] = (int*)(sum->data.ptr + (equRect.y + equRect.height)*sum->step) + equRect.x;
+                pq[3] = (int*)(sum->data.ptr + (equRect.y + equRect.height)*sum->step)
+                            + equRect.x + equRect.width;
             }
 
-            if( scan_roi )
+            if( scanROI.area() > 0 )
             {
                 //adjust start_height and stop_height
-                start_y = cvRound(scan_roi_rect.y / ystep);
-                end_y = cvRound((scan_roi_rect.y + scan_roi_rect.height - win_size.height) / ystep);
+                startY = cvRound(scanROI.y / ystep);
+                endY = cvRound((scanROI.y + scanROI.height - winSize.height) / ystep);
 
-                start_x = cvRound(scan_roi_rect.x / ystep);
-                end_x = cvRound((scan_roi_rect.x + scan_roi_rect.width - win_size.width) / ystep);
+                startX = cvRound(scanROI.x / ystep);
+                endX = cvRound((scanROI.x + scanROI.width - winSize.width) / ystep);
             }
 
-            cascade->hid_cascade->count = split_stage;
-
-            for( pass = 0; pass < npass; pass++ )
-            {
-            #ifdef _OPENMP
-                #pragma omp parallel for num_threads(max_threads) schedule(dynamic)
-            #endif
-                for( int _iy = start_y; _iy < end_y; _iy++ )
-                {
-                    int thread_id = cvGetThreadNum();
-                    int iy = cvRound(_iy*ystep);
-                    int _ix, _xstep = 1;
-                    uchar* mask_row = temp->data.ptr + temp->step * iy;
-
-                    for( _ix = start_x; _ix < end_x; _ix += _xstep )
-                    {
-                        int ix = cvRound(_ix*ystep); // it really should be ystep
-
-                        if( pass == 0 )
-                        {
-                            int result;
-                            _xstep = 2;
-
-                            if( do_canny_pruning )
-                            {
-                                int offset;
-                                int s, sq;
-
-                                offset = iy*(sum->step/sizeof(p0[0])) + ix;
-                                s = p0[offset] - p1[offset] - p2[offset] + p3[offset];
-                                sq = pq0[offset] - pq1[offset] - pq2[offset] + pq3[offset];
-                                if( s < 100 || sq < 20 )
-                                    continue;
-                            }
-
-                            result = cvRunHaarClassifierCascade( cascade, cvPoint(ix,iy), 0 );
-                            if( result > 0 )
-                            {
-                                if( pass < npass - 1 )
-                                    mask_row[ix] = 1;
-                                else
-                                {
-                                    CvRect rect = cvRect(ix,iy,win_size.width,win_size.height);
-                                    cvSeqPush( seq_thread[thread_id], &rect );
-                                }
-                            }
-                            if( result < 0 )
-                                _xstep = 1;
-                        }
-                        else if( mask_row[ix] )
-                        {
-                            int result = cvRunHaarClassifierCascade( cascade, cvPoint(ix,iy),
-                                                                     stage_offset );
-                            if( result > 0 )
-                            {
-                                if( pass == npass - 1 )
-                                {
-                                    CvRect rect = cvRect(ix,iy,win_size.width,win_size.height);
-                                    cvSeqPush( seq_thread[thread_id], &rect );
-                                }
-                            }
-                            else
-                                mask_row[ix] = 0;
-                        }
-                    }
-                }
-                stage_offset = cascade->hid_cascade->count;
-                cascade->hid_cascade->count = cascade->count;
-            }
+            cv::parallel_for(cv::BlockedRange(startY, endY),
+                cv::HaarDetectObjects_ScaleCascade_Invoker(cascade, winSize, cv::Range(startX, endX),
+                                                           ystep, sum->step, (const int**)p,
+                                                           (const int**)pq, allCandidates ));
 
-            // gather the results
-            if( max_threads > 1 )
-                   for( i = 0; i < max_threads; i++ )
-                   {
-                           CvSeq* s = seq_thread[i];
-                    int j, total = s->total;
-                    CvSeqBlock* b = s->first;
-                    for( j = 0; j < total; j += b->count, b = b->next )
-                        cvSeqPushMulti( seq, b->data, b->count );
-                   }
-
-            if( find_biggest_object )
+            if( findBiggestObject && !allCandidates.empty() && scanROI.area() == 0 )
             {
-                CvSeq* bseq = min_neighbors > 0 ? big_seq : seq;
+                rectList.resize(allCandidates.size());
+                std::copy(allCandidates.begin(), allCandidates.end(), rectList.begin());
                 
-                if( min_neighbors > 0 && !scan_roi )
-                {
-                    // group retrieved rectangles in order to filter out noise
-                    int ncomp = cvSeqPartition( seq, 0, &idx_seq, is_equal, 0 );
-                    comps.allocate( (ncomp+1)*sizeof(comps[0]));
-                    memset( comps, 0, (ncomp+1)*sizeof(comps[0]));
-
-                #if VERY_ROUGH_SEARCH
-                    if( rough_search )
-                    {
-                        for( i = 0; i < seq->total; i++ )
-                        {
-                            CvRect r1 = *(CvRect*)cvGetSeqElem( seq, i );
-                            int idx = *(int*)cvGetSeqElem( idx_seq, i );
-                            assert( (unsigned)idx < (unsigned)ncomp );
-
-                            comps[idx].neighbors++;
-                            comps[idx].rect.x += r1.x;
-                            comps[idx].rect.y += r1.y;
-                            comps[idx].rect.width += r1.width;
-                            comps[idx].rect.height += r1.height;
-                        }
-
-                        // calculate average bounding box
-                        for( i = 0; i < ncomp; i++ )
-                        {
-                            int n = comps[i].neighbors;
-                            if( n >= min_neighbors )
-                            {
-                                CvAvgComp comp;
-                                comp.rect.x = (comps[i].rect.x*2 + n)/(2*n);
-                                comp.rect.y = (comps[i].rect.y*2 + n)/(2*n);
-                                comp.rect.width = (comps[i].rect.width*2 + n)/(2*n);
-                                comp.rect.height = (comps[i].rect.height*2 + n)/(2*n);
-                                comp.neighbors = n;
-                                cvSeqPush( bseq, &comp );
-                            }
-                        }
-                    }
-                    else
-                #endif
-                    {
-                        for( i = 0 ; i <= ncomp; i++ )
-                            comps[i].rect.x = comps[i].rect.y = INT_MAX;
-
-                        // count number of neighbors
-                        for( i = 0; i < seq->total; i++ )
-                        {
-                            CvRect r1 = *(CvRect*)cvGetSeqElem( seq, i );
-                            int idx = *(int*)cvGetSeqElem( idx_seq, i );
-                            assert( (unsigned)idx < (unsigned)ncomp );
-
-                            comps[idx].neighbors++;
-
-                            // rect.width and rect.height will store coordinate of right-bottom corner
-                            comps[idx].rect.x = MIN(comps[idx].rect.x, r1.x);
-                            comps[idx].rect.y = MIN(comps[idx].rect.y, r1.y);
-                            comps[idx].rect.width = MAX(comps[idx].rect.width, r1.x+r1.width-1);
-                            comps[idx].rect.height = MAX(comps[idx].rect.height, r1.y+r1.height-1);
-                        }
-
-                        // calculate enclosing box
-                        for( i = 0; i < ncomp; i++ )
-                        {
-                            int n = comps[i].neighbors;
-                            if( n >= min_neighbors )
-                            {
-                                CvAvgComp comp;
-                                int t;
-                                double min_scale = rough_search ? 0.6 : 0.4;
-                                comp.rect.x = comps[i].rect.x;
-                                comp.rect.y = comps[i].rect.y;
-                                comp.rect.width = comps[i].rect.width - comps[i].rect.x + 1;
-                                comp.rect.height = comps[i].rect.height - comps[i].rect.y + 1;
-
-                                // update min_size
-                                t = cvRound( comp.rect.width*min_scale );
-                                min_size.width = MAX( min_size.width, t );
-
-                                t = cvRound( comp.rect.height*min_scale );
-                                min_size.height = MAX( min_size.height, t );
-
-                                //expand the box by 20% because we could miss some neighbours
-                                //see 'is_equal' function
-                            #if 1
-                                int offset = cvRound(comp.rect.width * 0.2);
-                                int right = MIN( img->cols-1, comp.rect.x+comp.rect.width-1 + offset );
-                                int bottom = MIN( img->rows-1, comp.rect.y+comp.rect.height-1 + offset);
-                                comp.rect.x = MAX( comp.rect.x - offset, 0 );
-                                comp.rect.y = MAX( comp.rect.y - offset, 0 );
-                                comp.rect.width = right - comp.rect.x + 1;
-                                comp.rect.height = bottom - comp.rect.y + 1;
-                            #endif
-
-                                comp.neighbors = n;
-                                cvSeqPush( bseq, &comp );
-                            }
-                        }
-                    }
-
-                    cvFree( &comps );
-                }
-
-                // extract the biggest rect
-                if( bseq->total > 0 )
+                groupRectangles(rectList, std::max(minNeighbors, 1), GROUP_EPS);
+                
+                if( !rectList.empty() )
                 {
-                    int max_area = 0;
-                    for( i = 0; i < bseq->total; i++ )
-                    {
-                        CvAvgComp* comp = (CvAvgComp*)cvGetSeqElem( bseq, i );
-                        int area = comp->rect.width * comp->rect.height;
-                        if( max_area < area )
-                        {
-                            max_area = area;
-                            result_comp.rect = comp->rect;
-                            result_comp.neighbors = bseq == seq ? 1 : comp->neighbors;
-                        }
-                    }
-
-                    //Prepare information for further scanning inside the biggest rectangle
-
-                #if VERY_ROUGH_SEARCH
-                    // change scan ranges to roi in case of required
-                    if( !rough_search && !scan_roi )
-                    {
-                        scan_roi = true;
-                        scan_roi_rect = result_comp.rect;
-                        cvClearSeq(bseq);
-                    }
-                    else if( rough_search )
-                        is_found = true;
-                #else
-                    if( !scan_roi )
+                    size_t i, sz = rectList.size();
+                    cv::Rect maxRect;
+                    
+                    for( i = 0; i < sz; i++ )
                     {
-                        scan_roi = true;
-                        scan_roi_rect = result_comp.rect;
-                        cvClearSeq(bseq);
+                        if( rectList[i].area() > maxRect.area() )
+                            maxRect = rectList[i];
                     }
-                #endif
+                    
+                    allCandidates.push_back(maxRect);
+                    
+                    scanROI = maxRect;
+                    int dx = cvRound(maxRect.width*GROUP_EPS);
+                    int dy = cvRound(maxRect.height*GROUP_EPS);
+                    scanROI.x = std::max(scanROI.x - dx, 0);
+                    scanROI.y = std::max(scanROI.y - dy, 0);
+                    scanROI.width = std::min(scanROI.width + dx*2, img->cols-1-scanROI.x);
+                    scanROI.height = std::min(scanROI.height + dy*2, img->rows-1-scanROI.y);
+                
+                    double minScale = roughSearch ? 0.6 : 0.4;
+                    minSize.width = cvRound(maxRect.width*minScale);
+                    minSize.height = cvRound(maxRect.height*minScale);
                 }
             }
         }
     }
 
-    if( min_neighbors == 0 && !find_biggest_object )
-    {
-        for( i = 0; i < seq->total; i++ )
-        {
-            CvRect* rect = (CvRect*)cvGetSeqElem( seq, i );
-            CvAvgComp comp;
-            comp.rect = *rect;
-            comp.neighbors = 1;
-            cvSeqPush( result_seq, &comp );
-        }
-    }
-
-    if( min_neighbors != 0
-#if VERY_ROUGH_SEARCH        
-        && (!find_biggest_object || !rough_search)
-#endif        
-        )
+    rectList.resize(allCandidates.size());
+    if(!allCandidates.empty())
+        std::copy(allCandidates.begin(), allCandidates.end(), rectList.begin());
+    
+    if( minNeighbors != 0 || findBiggestObject )
+        groupRectangles(rectList, rweights, std::max(minNeighbors, 1), GROUP_EPS);
+        
+    if( findBiggestObject && rectList.size() )
     {
-        // group retrieved rectangles in order to filter out noise
-        int ncomp = cvSeqPartition( seq, 0, &idx_seq, is_equal, 0 );
-        comps.allocate(ncomp+1);
-        memset( &comps[0], 0, (ncomp+1)*sizeof(comps[0]));
-
-        // count number of neighbors
-        for( i = 0; i < seq->total; i++ )
-        {
-            CvRect r1 = *(CvRect*)cvGetSeqElem( seq, i );
-            int idx = *(int*)cvGetSeqElem( idx_seq, i );
-            assert( (unsigned)idx < (unsigned)ncomp );
-
-            comps[idx].neighbors++;
-
-            comps[idx].rect.x += r1.x;
-            comps[idx].rect.y += r1.y;
-            comps[idx].rect.width += r1.width;
-            comps[idx].rect.height += r1.height;
-        }
-
-        // calculate average bounding box
-        for( i = 0; i < ncomp; i++ )
+        CvAvgComp result_comp = {{0,0,0,0},0};
+        
+        for( size_t i = 0; i < rectList.size(); i++ )
         {
-            int n = comps[i].neighbors;
-            if( n >= min_neighbors )
+            cv::Rect r = rectList[i];
+            if( r.area() > cv::Rect(result_comp.rect).area() )
             {
-                CvAvgComp comp;
-                comp.rect.x = (comps[i].rect.x*2 + n)/(2*n);
-                comp.rect.y = (comps[i].rect.y*2 + n)/(2*n);
-                comp.rect.width = (comps[i].rect.width*2 + n)/(2*n);
-                comp.rect.height = (comps[i].rect.height*2 + n)/(2*n);
-                comp.neighbors = comps[i].neighbors;
-
-                cvSeqPush( seq2, &comp );
+                result_comp.rect = r;
+                result_comp.neighbors = rweights[i];
             }
         }
-
-        if( !find_biggest_object )
-        {
-            // filter out small face rectangles inside large face rectangles
-            for( i = 0; i < seq2->total; i++ )
-            {
-                CvAvgComp r1 = *(CvAvgComp*)cvGetSeqElem( seq2, i );
-                int j, flag = 1;
-
-                for( j = 0; j < seq2->total; j++ )
-                {
-                    CvAvgComp r2 = *(CvAvgComp*)cvGetSeqElem( seq2, j );
-                    int distance = cvRound( r2.rect.width * 0.2 );
-
-                    if( i != j &&
-                        r1.rect.x >= r2.rect.x - distance &&
-                        r1.rect.y >= r2.rect.y - distance &&
-                        r1.rect.x + r1.rect.width <= r2.rect.x + r2.rect.width + distance &&
-                        r1.rect.y + r1.rect.height <= r2.rect.y + r2.rect.height + distance &&
-                        (r2.neighbors > MAX( 3, r1.neighbors ) || r1.neighbors < 3) )
-                    {
-                        flag = 0;
-                        break;
-                    }
-                }
-
-                if( flag )
-                    cvSeqPush( result_seq, &r1 );
-            }
-        }
-        else
+        cvSeqPush( result_seq, &result_comp );
+    }
+    else
+    {
+        for( size_t i = 0; i < rectList.size(); i++ )
         {
-            int max_area = 0;
-            for( i = 0; i < seq2->total; i++ )
-            {
-                CvAvgComp* comp = (CvAvgComp*)cvGetSeqElem( seq2, i );
-                int area = comp->rect.width * comp->rect.height;
-                if( max_area < area )
-                {
-                    max_area = area;
-                    result_comp = *comp;
-                }                
-            }
+            CvAvgComp c;
+            c.rect = rectList[i];
+            c.neighbors = rweights[i];
+            cvSeqPush( result_seq, &c );
         }
     }
 
-    if( find_biggest_object && result_comp.rect.width > 0 )
-        cvSeqPush( result_seq, &result_comp );
-
-    if( max_threads > 1 )
-           for( i = 0; i < max_threads; i++ )
-           {
-                   if( seq_thread[i] )
-                cvReleaseMemStorage( &seq_thread[i]->storage );
-           }
-
     return result_seq;
 }
 
index b9db898e36c8671868b07c9c43be21bb82428b3e..5061315c2ce49ce2580e891e83bce34b6b1c7f4b 100644 (file)
@@ -868,9 +868,10 @@ static void findStereoCorrespondenceBM( const Mat& left0, const Mat& right0, Mat
     \r
 #ifdef HAVE_TBB    \r
     const double SAD_overhead_coeff = 10.0;\r
-    double N0 = 100000 / (useShorts ? 1 : 4);  // approx tbb's min number instructions reasonable for one thread    \r
+    double N0 = 8000000 / (useShorts ? 1 : 4);  // approx tbb's min number instructions reasonable for one thread    \r
     double maxStripeSize = min(max(N0 / (width * ndisp), (wsz-1) * SAD_overhead_coeff), (double)height);\r
     int nstripes = cvCeil(height / maxStripeSize);\r
+    printf("nstripes=%d\n", nstripes);\r
 #else\r
     const int nstripes = 1;\r
 #endif\r
index f56112b4532b8782499d0d8caddbffba8090f887..db12bba6b89e90af0b22d9b18056f349353c97a5 100644 (file)
@@ -383,44 +383,303 @@ static CvSeq* icvFastHessianDetector( const CvMat* sum, const CvMat* mask_sum,
 }
 
 
+namespace cv
+{
+
+struct SURFInvoker
+{
+    enum { ORI_RADIUS = 6, ORI_WIN = 60, PATCH_SZ = 20 };
+    
+    static const int ORI_SEARCH_INC;
+    static const float ORI_SIGMA;
+    static const float DESC_SIGMA;
+    
+    SURFInvoker( const CvSURFParams* _params,
+                 CvSeq* _keypoints, CvSeq* _descriptors,
+                 const CvMat* _img, const CvMat* _sum, 
+                 const CvPoint* _apt, const float* _aptw,
+                 int _nangle0, const float* _DW )
+    {
+        params = _params;
+        keypoints = _keypoints;
+        descriptors = _descriptors;
+        img = _img;
+        sum = _sum;
+        apt = _apt;
+        aptw = _aptw;
+        nangle0 = _nangle0;
+        DW = _DW;
+    }
+    
+    void operator()(const BlockedRange& range) const
+    {
+        /* X and Y gradient wavelet data */
+        const int NX=2, NY=2;
+        int dx_s[NX][5] = {{0, 0, 2, 4, -1}, {2, 0, 4, 4, 1}};
+        int dy_s[NY][5] = {{0, 0, 4, 2, 1}, {0, 2, 4, 4, -1}};
+        
+        const int descriptor_size = params->extended ? 128 : 64;
+        
+        const int max_ori_samples = (2*ORI_RADIUS+1)*(2*ORI_RADIUS+1);
+        float X[max_ori_samples], Y[max_ori_samples], angle[max_ori_samples];
+        uchar PATCH[PATCH_SZ+1][PATCH_SZ+1];
+        float DX[PATCH_SZ][PATCH_SZ], DY[PATCH_SZ][PATCH_SZ];
+        
+        CvMat _X = cvMat(1, max_ori_samples, CV_32F, X);
+        CvMat _Y = cvMat(1, max_ori_samples, CV_32F, Y);
+        CvMat _angle = cvMat(1, max_ori_samples, CV_32F, angle);
+        CvMat _patch = cvMat(PATCH_SZ+1, PATCH_SZ+1, CV_8U, PATCH);
+        
+        int k, k1 = range.begin(), k2 = range.end();
+        int maxSize = 0;
+        
+        for( k = k1; k < k2; k++ )
+            maxSize = std::max(maxSize, ((CvSURFPoint*)cvGetSeqElem( keypoints, k ))->size);
+        
+        maxSize = cvCeil((PATCH_SZ+1)*maxSize*1.2f/9.0f);
+        Ptr<CvMat> winbuf = cvCreateMat( 1, maxSize*maxSize, CV_8U );
+        
+        for( k = k1; k < k2; k++ )
+        {
+            const int* sum_ptr = sum->data.i;
+            int sum_cols = sum->cols;
+            int i, j, kk, x, y, nangle;
+            
+            float* vec;
+            CvSurfHF dx_t[NX], dy_t[NY];
+            
+            CvSURFPoint* kp = (CvSURFPoint*)cvGetSeqElem( keypoints, k );
+            int size = kp->size;
+            CvPoint2D32f center = kp->pt;
+            
+            /* The sampling intervals and wavelet sized for selecting an orientation
+             and building the keypoint descriptor are defined relative to 's' */
+            float s = (float)size*1.2f/9.0f;
+            
+            /* To find the dominant orientation, the gradients in x and y are
+             sampled in a circle of radius 6s using wavelets of size 4s.
+             We ensure the gradient wavelet size is even to ensure the 
+             wavelet pattern is balanced and symmetric around its center */
+            int grad_wav_size = 2*cvRound( 2*s );
+            if ( sum->rows < grad_wav_size || sum->cols < grad_wav_size )
+            {
+                /* when grad_wav_size is too big,
+                 * the sampling of gradient will be meaningless
+                 * mark keypoint for deletion. */
+                kp->size = -1;
+                continue;
+            }
+            icvResizeHaarPattern( dx_s, dx_t, NX, 4, grad_wav_size, sum->cols );
+            icvResizeHaarPattern( dy_s, dy_t, NY, 4, grad_wav_size, sum->cols );
+            for( kk = 0, nangle = 0; kk < nangle0; kk++ )
+            {
+                const int* ptr;
+                float vx, vy;
+                x = cvRound( center.x + apt[kk].x*s - (float)(grad_wav_size-1)/2 );
+                y = cvRound( center.y + apt[kk].y*s - (float)(grad_wav_size-1)/2 );
+                if( (unsigned)y >= (unsigned)(sum->rows - grad_wav_size) ||
+                   (unsigned)x >= (unsigned)(sum->cols - grad_wav_size) )
+                    continue;
+                ptr = sum_ptr + x + y*sum_cols;
+                vx = icvCalcHaarPattern( ptr, dx_t, 2 );
+                vy = icvCalcHaarPattern( ptr, dy_t, 2 );
+                X[nangle] = vx*aptw[kk]; Y[nangle] = vy*aptw[kk];
+                nangle++;
+            }
+            if ( nangle == 0 )
+            {
+                /* No gradient could be sampled because the keypoint is too
+                 * near too one or more of the sides of the image. As we
+                 * therefore cannot find a dominant direction, we skip this
+                 * keypoint and mark it for later deletion from the sequence. */
+                kp->size = -1;
+                continue;
+            }
+            _X.cols = _Y.cols = _angle.cols = nangle;
+            cvCartToPolar( &_X, &_Y, 0, &_angle, 1 );
+            
+            float bestx = 0, besty = 0, descriptor_mod = 0;
+            for( i = 0; i < 360; i += ORI_SEARCH_INC )
+            {
+                float sumx = 0, sumy = 0, temp_mod;
+                for( j = 0; j < nangle; j++ )
+                {
+                    int d = std::abs(cvRound(angle[j]) - i);
+                    if( d < ORI_WIN/2 || d > 360-ORI_WIN/2 )
+                    {
+                        sumx += X[j];
+                        sumy += Y[j];
+                    }
+                }
+                temp_mod = sumx*sumx + sumy*sumy;
+                if( temp_mod > descriptor_mod )
+                {
+                    descriptor_mod = temp_mod;
+                    bestx = sumx;
+                    besty = sumy;
+                }
+            }
+            
+            float descriptor_dir = cvFastArctan( besty, bestx );
+            kp->dir = descriptor_dir;
+            
+            if( !descriptors )
+                continue;
+            
+            descriptor_dir *= (float)(CV_PI/180);
+            
+            /* Extract a window of pixels around the keypoint of size 20s */
+            int win_size = (int)((PATCH_SZ+1)*s);
+            CV_Assert( winbuf->cols >= win_size*win_size );
+            
+            CvMat win = cvMat(win_size, win_size, CV_8U, winbuf->data.ptr);
+            float sin_dir = sin(descriptor_dir);
+            float cos_dir = cos(descriptor_dir) ;
+            
+            /* Subpixel interpolation version (slower). Subpixel not required since
+             the pixels will all get averaged when we scale down to 20 pixels */
+            /*  
+             float w[] = { cos_dir, sin_dir, center.x,
+             -sin_dir, cos_dir , center.y };
+             CvMat W = cvMat(2, 3, CV_32F, w);
+             cvGetQuadrangleSubPix( img, &win, &W );
+             */
+            
+            /* Nearest neighbour version (faster) */
+            float win_offset = -(float)(win_size-1)/2;
+            float start_x = center.x + win_offset*cos_dir + win_offset*sin_dir;
+            float start_y = center.y - win_offset*sin_dir + win_offset*cos_dir;
+            uchar* WIN = win.data.ptr;
+            for( i = 0; i < win_size; i++, start_x += sin_dir, start_y += cos_dir )
+            {
+                float pixel_x = start_x;
+                float pixel_y = start_y;
+                for( j = 0; j < win_size; j++, pixel_x += cos_dir, pixel_y -= sin_dir )
+                {
+                    int x = std::min(std::max(cvRound(pixel_x), 0), img->cols-1);
+                    int y = std::min(std::max(cvRound(pixel_y), 0), img->rows-1);
+                    WIN[i*win_size + j] = img->data.ptr[y*img->step + x];
+                }
+            }
+            
+            /* Scale the window to size PATCH_SZ so each pixel's size is s. This
+             makes calculating the gradients with wavelets of size 2s easy */
+            cvResize( &win, &_patch, CV_INTER_AREA );
+            
+            /* Calculate gradients in x and y with wavelets of size 2s */
+            for( i = 0; i < PATCH_SZ; i++ )
+                for( j = 0; j < PATCH_SZ; j++ )
+                {
+                    float dw = DW[i*PATCH_SZ + j];
+                    float vx = (PATCH[i][j+1] - PATCH[i][j] + PATCH[i+1][j+1] - PATCH[i+1][j])*dw;
+                    float vy = (PATCH[i+1][j] - PATCH[i][j] + PATCH[i+1][j+1] - PATCH[i][j+1])*dw;
+                    DX[i][j] = vx;
+                    DY[i][j] = vy;
+                }
+            
+            /* Construct the descriptor */
+            vec = (float*)cvGetSeqElem( descriptors, k );
+            for( kk = 0; kk < (int)(descriptors->elem_size/sizeof(vec[0])); kk++ )
+                vec[kk] = 0;
+            double square_mag = 0;       
+            if( params->extended )
+            {
+                /* 128-bin descriptor */
+                for( i = 0; i < 4; i++ )
+                    for( j = 0; j < 4; j++ )
+                    {
+                        for( y = i*5; y < i*5+5; y++ )
+                        {
+                            for( x = j*5; x < j*5+5; x++ )
+                            {
+                                float tx = DX[y][x], ty = DY[y][x];
+                                if( ty >= 0 )
+                                {
+                                    vec[0] += tx;
+                                    vec[1] += (float)fabs(tx);
+                                } else {
+                                    vec[2] += tx;
+                                    vec[3] += (float)fabs(tx);
+                                }
+                                if ( tx >= 0 )
+                                {
+                                    vec[4] += ty;
+                                    vec[5] += (float)fabs(ty);
+                                } else {
+                                    vec[6] += ty;
+                                    vec[7] += (float)fabs(ty);
+                                }
+                            }
+                        }
+                        for( kk = 0; kk < 8; kk++ )
+                            square_mag += vec[kk]*vec[kk];
+                        vec += 8;
+                    }
+            }
+            else
+            {
+                /* 64-bin descriptor */
+                for( i = 0; i < 4; i++ )
+                    for( j = 0; j < 4; j++ )
+                    {
+                        for( y = i*5; y < i*5+5; y++ )
+                        {
+                            for( x = j*5; x < j*5+5; x++ )
+                            {
+                                float tx = DX[y][x], ty = DY[y][x];
+                                vec[0] += tx; vec[1] += ty;
+                                vec[2] += (float)fabs(tx); vec[3] += (float)fabs(ty);
+                            }
+                        }
+                        for( kk = 0; kk < 4; kk++ )
+                            square_mag += vec[kk]*vec[kk];
+                        vec+=4;
+                    }
+            }
+            
+            /* unit vector is essential for contrast invariance */
+            vec = (float*)cvGetSeqElem( descriptors, k );
+            double scale = 1./(sqrt(square_mag) + DBL_EPSILON);
+            for( kk = 0; kk < descriptor_size; kk++ )
+                vec[kk] = (float)(vec[kk]*scale);
+        }
+    }
+   
+    const CvSURFParams* params;
+    const CvMat* img;
+    const CvMat* sum;
+    CvSeq* keypoints;
+    CvSeq* descriptors;
+    const CvPoint* apt;
+    const float* aptw;
+    int nangle0;
+    const float* DW;
+};
+     
+const int SURFInvoker::ORI_SEARCH_INC = 5;  
+const float SURFInvoker::ORI_SIGMA = 2.5f;
+const float SURFInvoker::DESC_SIGMA = 3.3f;
+    
+}
+
+
 CV_IMPL void
 cvExtractSURF( const CvArr* _img, const CvArr* _mask,
                CvSeq** _keypoints, CvSeq** _descriptors,
                CvMemStorage* storage, CvSURFParams params,
                           int useProvidedKeyPts)
 {
-    CvMat *sum = 0, *mask1 = 0, *mask_sum = 0, **win_bufs = 0;
+    const int ORI_RADIUS = cv::SURFInvoker::ORI_RADIUS;
+    const int ORI_SIGMA = cv::SURFInvoker::ORI_SIGMA;
+    const float DESC_SIGMA = cv::SURFInvoker::DESC_SIGMA;
+    
+    CvMat *sum = 0, *mask1 = 0, *mask_sum = 0;
 
     if( _keypoints && !useProvidedKeyPts ) // If useProvidedKeyPts!=0 we'll use current contents of "*_keypoints"
         *_keypoints = 0;
     if( _descriptors )
         *_descriptors = 0;
 
-    /* Radius of the circle in which to sample gradients to assign an 
-       orientation */
-    const int ORI_RADIUS = 6; 
-
-    /* The size of the sliding window (in degrees) used to assign an 
-       orientation */
-    const int ORI_WIN = 60;   
-
-    /* Increment used for the orientation sliding window (in degrees) */
-    const int ORI_SEARCH_INC = 5;  
-
-    /* Standard deviation of the Gaussian used to weight the gradient samples
-       used to assign an orientation */ 
-    const float ORI_SIGMA = 2.5f;
-
-    /* Standard deviation of the Gaussian used to weight the gradient samples
-       used to build a keypoint descriptor */
-    const float DESC_SIGMA = 3.3f;
-
-
-    /* X and Y gradient wavelet data */
-    const int NX=2, NY=2;
-    int dx_s[NX][5] = {{0, 0, 2, 4, -1}, {2, 0, 4, 4, 1}};
-    int dy_s[NY][5] = {{0, 0, 4, 2, 1}, {0, 2, 4, 4, -1}};
-
     CvSeq *keypoints, *descriptors = 0;
     CvMat imghdr, *img = cvGetMat(_img, &imghdr);
     CvMat maskhdr, *mask = _mask ? cvGetMat(_mask, &maskhdr) : 0;
@@ -432,9 +691,8 @@ cvExtractSURF( const CvArr* _img, const CvArr* _mask,
     float DW[PATCH_SZ][PATCH_SZ];
     CvMat _DW = cvMat(PATCH_SZ, PATCH_SZ, CV_32F, DW);
     CvPoint apt[max_ori_samples];
-    float apt_w[max_ori_samples];
-    int i, j, k, nangle0 = 0, N;
-    int nthreads = cvGetNumThreads();
+    float aptw[max_ori_samples];
+    int i, j, nangle0 = 0, N;
 
     CV_Assert(img != 0);
     CV_Assert(CV_MAT_TYPE(img->type) == CV_8UC1);
@@ -444,7 +702,7 @@ cvExtractSURF( const CvArr* _img, const CvArr* _mask,
     CV_Assert(params.nOctaves > 0);
     CV_Assert(params.nOctaveLayers > 0);
 
-    sum = cvCreateMat( img->height+1, img->width+1, CV_32SC1 );
+    sum = cvCreateMat( img->rows+1, img->cols+1, CV_32SC1 );
     cvIntegral( img, sum );
        
        // Compute keypoints only if we are not asked for evaluating the descriptors are some given locations:
@@ -484,13 +742,12 @@ cvExtractSURF( const CvArr* _img, const CvArr* _mask,
             if( i*i + j*j <= ORI_RADIUS*ORI_RADIUS )
             {
                 apt[nangle0] = cvPoint(j,i);
-                apt_w[nangle0++] = G[i+ORI_RADIUS]*G[j+ORI_RADIUS];
+                aptw[nangle0++] = G[i+ORI_RADIUS]*G[j+ORI_RADIUS];
             }
         }
     }
 
     /* Gaussian used to weight descriptor samples */
-    {
     double c2 = 1./(DESC_SIGMA*DESC_SIGMA*2);
     double gs = 0;
     for( i = 0; i < PATCH_SZ; i++ )
@@ -504,234 +761,12 @@ cvExtractSURF( const CvArr* _img, const CvArr* _mask,
         }
     }
     cvScale( &_DW, &_DW, 1./gs );
-    }
-
-    win_bufs = (CvMat**)cvAlloc(nthreads*sizeof(win_bufs[0]));
-    for( i = 0; i < nthreads; i++ )
-        win_bufs[i] = 0;
-
-#ifdef _OPENMP
-#pragma omp parallel for num_threads(nthreads) schedule(dynamic)
-#endif
-    for( k = 0; k < N; k++ )
-    {
-        const int* sum_ptr = sum->data.i;
-        int sum_cols = sum->cols;
-        int i, j, kk, x, y, nangle;
-        float X[max_ori_samples], Y[max_ori_samples], angle[max_ori_samples];
-        uchar PATCH[PATCH_SZ+1][PATCH_SZ+1];
-        float DX[PATCH_SZ][PATCH_SZ], DY[PATCH_SZ][PATCH_SZ];
-        CvMat _X = cvMat(1, max_ori_samples, CV_32F, X);
-        CvMat _Y = cvMat(1, max_ori_samples, CV_32F, Y);
-        CvMat _angle = cvMat(1, max_ori_samples, CV_32F, angle);
-        CvMat _patch = cvMat(PATCH_SZ+1, PATCH_SZ+1, CV_8U, PATCH);
-        float* vec;
-        CvSurfHF dx_t[NX], dy_t[NY];
-        int thread_idx = cvGetThreadNum();
-        
-        CvSURFPoint* kp = (CvSURFPoint*)cvGetSeqElem( keypoints, k );
-        int size = kp->size;
-        CvPoint2D32f center = kp->pt;
-
-        /* The sampling intervals and wavelet sized for selecting an orientation
-           and building the keypoint descriptor are defined relative to 's' */
-        float s = (float)size*1.2f/9.0f;
-
-        /* To find the dominant orientation, the gradients in x and y are
-           sampled in a circle of radius 6s using wavelets of size 4s.
-           We ensure the gradient wavelet size is even to ensure the 
-           wavelet pattern is balanced and symmetric around its center */
-        int grad_wav_size = 2*cvRound( 2*s );
-        if ( sum->rows < grad_wav_size || sum->cols < grad_wav_size )
-        {
-            /* when grad_wav_size is too big,
-            * the sampling of gradient will be meaningless
-            * mark keypoint for deletion. */
-            kp->size = -1;
-            continue;
-        }
-        icvResizeHaarPattern( dx_s, dx_t, NX, 4, grad_wav_size, sum->cols );
-        icvResizeHaarPattern( dy_s, dy_t, NY, 4, grad_wav_size, sum->cols );
-        for( kk = 0, nangle = 0; kk < nangle0; kk++ )
-        {
-            const int* ptr;
-            float vx, vy;
-            x = cvRound( center.x + apt[kk].x*s - (float)(grad_wav_size-1)/2 );
-            y = cvRound( center.y + apt[kk].y*s - (float)(grad_wav_size-1)/2 );
-            if( (unsigned)y >= (unsigned)(sum->rows - grad_wav_size) ||
-                (unsigned)x >= (unsigned)(sum->cols - grad_wav_size) )
-                continue;
-            ptr = sum_ptr + x + y*sum_cols;
-            vx = icvCalcHaarPattern( ptr, dx_t, 2 );
-            vy = icvCalcHaarPattern( ptr, dy_t, 2 );
-            X[nangle] = vx*apt_w[kk]; Y[nangle] = vy*apt_w[kk];
-            nangle++;
-        }
-        if ( nangle == 0 )
-        {
-            /* No gradient could be sampled because the keypoint is too
-            * near too one or more of the sides of the image. As we
-            * therefore cannot find a dominant direction, we skip this
-            * keypoint and mark it for later deletion from the sequence. */
-            kp->size = -1;
-            continue;
-        }
-        _X.cols = _Y.cols = _angle.cols = nangle;
-        cvCartToPolar( &_X, &_Y, 0, &_angle, 1 );
-
-        float bestx = 0, besty = 0, descriptor_mod = 0;
-        for( i = 0; i < 360; i += ORI_SEARCH_INC )
-        {
-            float sumx = 0, sumy = 0, temp_mod;
-            for( j = 0; j < nangle; j++ )
-            {
-                int d = abs(cvRound(angle[j]) - i);
-                if( d < ORI_WIN/2 || d > 360-ORI_WIN/2 )
-                {
-                    sumx += X[j];
-                    sumy += Y[j];
-                }
-            }
-            temp_mod = sumx*sumx + sumy*sumy;
-            if( temp_mod > descriptor_mod )
-            {
-                descriptor_mod = temp_mod;
-                bestx = sumx;
-                besty = sumy;
-            }
-        }
-        
-        float descriptor_dir = cvFastArctan( besty, bestx );
-        kp->dir = descriptor_dir;
-
-        if( !_descriptors )
-            continue;
-
-        descriptor_dir *= (float)(CV_PI/180);
-        
-        /* Extract a window of pixels around the keypoint of size 20s */
-        int win_size = (int)((PATCH_SZ+1)*s);
-        if( win_bufs[thread_idx] == 0 || win_bufs[thread_idx]->cols < win_size*win_size )
-        {
-            cvReleaseMat( &win_bufs[thread_idx] );
-            win_bufs[thread_idx] = cvCreateMat( 1, win_size*win_size, CV_8U );
-        }
-        
-        CvMat win = cvMat(win_size, win_size, CV_8U, win_bufs[thread_idx]->data.ptr);
-        float sin_dir = sin(descriptor_dir);
-        float cos_dir = cos(descriptor_dir) ;
-
-        /* Subpixel interpolation version (slower). Subpixel not required since
-           the pixels will all get averaged when we scale down to 20 pixels */
-        /*  
-        float w[] = { cos_dir, sin_dir, center.x,
-                      -sin_dir, cos_dir , center.y };
-        CvMat W = cvMat(2, 3, CV_32F, w);
-        cvGetQuadrangleSubPix( img, &win, &W );
-        */
-
-        /* Nearest neighbour version (faster) */
-        float win_offset = -(float)(win_size-1)/2;
-        float start_x = center.x + win_offset*cos_dir + win_offset*sin_dir;
-        float start_y = center.y - win_offset*sin_dir + win_offset*cos_dir;
-        uchar* WIN = win.data.ptr;
-        for( i=0; i<win_size; i++, start_x+=sin_dir, start_y+=cos_dir )
-        {
-            float pixel_x = start_x;
-            float pixel_y = start_y;
-            for( j=0; j<win_size; j++, pixel_x+=cos_dir, pixel_y-=sin_dir )
-            {
-                int x = cvRound( pixel_x );
-                int y = cvRound( pixel_y );
-                x = MAX( x, 0 );
-                y = MAX( y, 0 );
-                x = MIN( x, img->cols-1 );
-                y = MIN( y, img->rows-1 );
-                WIN[i*win_size + j] = img->data.ptr[y*img->step+x];
-             }
-        }
-
-        /* Scale the window to size PATCH_SZ so each pixel's size is s. This
-           makes calculating the gradients with wavelets of size 2s easy */
-        cvResize( &win, &_patch, CV_INTER_AREA );
-
-        /* Calculate gradients in x and y with wavelets of size 2s */
-        for( i = 0; i < PATCH_SZ; i++ )
-            for( j = 0; j < PATCH_SZ; j++ )
-            {
-                float dw = DW[i][j];
-                float vx = (PATCH[i][j+1] - PATCH[i][j] + PATCH[i+1][j+1] - PATCH[i+1][j])*dw;
-                float vy = (PATCH[i+1][j] - PATCH[i][j] + PATCH[i+1][j+1] - PATCH[i][j+1])*dw;
-                DX[i][j] = vx;
-                DY[i][j] = vy;
-            }
-
-        /* Construct the descriptor */
-        vec = (float*)cvGetSeqElem( descriptors, k );
-        for( kk = 0; kk < (int)(descriptors->elem_size/sizeof(vec[0])); kk++ )
-            vec[kk] = 0;
-        double square_mag = 0;       
-        if( params.extended )
-        {
-            /* 128-bin descriptor */
-            for( i = 0; i < 4; i++ )
-                for( j = 0; j < 4; j++ )
-                {
-                    for( y = i*5; y < i*5+5; y++ )
-                    {
-                        for( x = j*5; x < j*5+5; x++ )
-                        {
-                            float tx = DX[y][x], ty = DY[y][x];
-                            if( ty >= 0 )
-                            {
-                                vec[0] += tx;
-                                vec[1] += (float)fabs(tx);
-                            } else {
-                                vec[2] += tx;
-                                vec[3] += (float)fabs(tx);
-                            }
-                            if ( tx >= 0 )
-                            {
-                                vec[4] += ty;
-                                vec[5] += (float)fabs(ty);
-                            } else {
-                                vec[6] += ty;
-                                vec[7] += (float)fabs(ty);
-                            }
-                        }
-                    }
-                    for( kk = 0; kk < 8; kk++ )
-                        square_mag += vec[kk]*vec[kk];
-                    vec += 8;
-                }
-        }
-        else
-        {
-            /* 64-bin descriptor */
-            for( i = 0; i < 4; i++ )
-                for( j = 0; j < 4; j++ )
-                {
-                    for( y = i*5; y < i*5+5; y++ )
-                    {
-                        for( x = j*5; x < j*5+5; x++ )
-                        {
-                            float tx = DX[y][x], ty = DY[y][x];
-                            vec[0] += tx; vec[1] += ty;
-                            vec[2] += (float)fabs(tx); vec[3] += (float)fabs(ty);
-                        }
-                    }
-                    for( kk = 0; kk < 4; kk++ )
-                        square_mag += vec[kk]*vec[kk];
-                    vec+=4;
-                }
-        }
 
-        /* unit vector is essential for contrast invariance */
-        vec = (float*)cvGetSeqElem( descriptors, k );
-        double scale = 1./(sqrt(square_mag) + DBL_EPSILON);
-        for( kk = 0; kk < descriptor_size; kk++ )
-            vec[kk] = (float)(vec[kk]*scale);
-    }
+    cv::parallel_for(cv::BlockedRange(0, N),
+                     cv::SURFInvoker(&params, keypoints, descriptors, img, sum,
+                                     apt, aptw, nangle0, &DW[0][0]));
+    //cv::SURFInvoker(&params, keypoints, descriptors, img, sum,
+    //                apt, aptw, nangle0, &DW[0][0])(cv::BlockedRange(0, N));
     
     /* remove keypoints that were marked for deletion */
     for ( i = 0; i < N; i++ )
@@ -747,9 +782,6 @@ cvExtractSURF( const CvArr* _img, const CvArr* _mask,
         }
     }
 
-    for( i = 0; i < nthreads; i++ )
-        cvReleaseMat( &win_bufs[i] );
-
     if( _keypoints && !useProvidedKeyPts )
         *_keypoints = keypoints;
     if( _descriptors )
@@ -758,7 +790,6 @@ cvExtractSURF( const CvArr* _img, const CvArr* _mask,
     cvReleaseMat( &sum );
     if (mask1) cvReleaseMat( &mask1 );
     if (mask_sum) cvReleaseMat( &mask_sum );
-    cvFree( &win_bufs );
 }
 
 
index 337e3f0ed7711a3965a2a98a14f325962e46ca0c..2dd4bf79e6e98606b45b30f4a7794d06e56229ff 100644 (file)
@@ -154,6 +154,7 @@ int getTrackbarPos( const string& trackbarName, const string& winName )
 #if   defined WIN32 || defined _WIN32         // see window_w32.cpp
 #elif defined (HAVE_GTK)      // see window_gtk.cpp
 #elif defined (HAVE_COCOA)   // see window_carbon.cpp
+#elif defined (HAVE_CARBON)
 
 
 #else
index 43aa7e93ea627f35ac5463c560a545399594c84c..1f9ce0af0d4acffe33e64a486bde78461bf6c631 100644 (file)
@@ -92,11 +92,12 @@ static bool wasInitialized = false;
 
 static void icvCocoaCleanup(void)
 {
-    if( application )
+    /*if( application )
     {
         [application terminate:nil];
+        application = 0;
         [pool release];
-    }
+    }*/
 }
 
 CV_IMPL int cvInitSystem( int argc, char** argv)