From: vp153 Date: Thu, 25 Mar 2010 00:33:27 +0000 (+0000) Subject: converted HOG & LKPyramid tracker from OpenMP to TBB. OpenMP is now commented off... X-Git-Url: https://rtime.felk.cvut.cz/gitweb/opencv.git/commitdiff_plain/e79c7fa63cea3883a1c1fc48694b190b7c1d85c0?ds=sidebyside converted HOG & LKPyramid tracker from OpenMP to TBB. OpenMP is now commented off in CMake. git-svn-id: https://code.ros.org/svn/opencv/trunk@2903 73c94f0f-984f-4a5f-82bc-2d8db8d8ee08 --- diff --git a/opencv/CMakeLists.txt b/opencv/CMakeLists.txt index b0be94e6..5223ce4f 100644 --- a/opencv/CMakeLists.txt +++ b/opencv/CMakeLists.txt @@ -203,12 +203,12 @@ if(UNIX) include(CheckIncludeFile) endif() -if(MSVC) - set(DEFAULT_ENABLE_OPENMP ON) -else() - set(DEFAULT_ENABLE_OPENMP OFF) -endif() -set(ENABLE_OPENMP ${DEFAULT_ENABLE_OPENMP} CACHE BOOL "") +#if(MSVC) +# set(DEFAULT_ENABLE_OPENMP ON) +#else() +# set(DEFAULT_ENABLE_OPENMP OFF) +#endif() +#set(ENABLE_OPENMP ${DEFAULT_ENABLE_OPENMP} CACHE BOOL "") if(CMAKE_COMPILER_IS_GNUCXX) set(ENABLE_PROFILING OFF CACHE BOOL "Enable profiling in the GCC compiler (Add flags: -g -pg)") @@ -658,9 +658,9 @@ if(MSVC) set(EXTRA_EXE_LINKER_FLAGS_RELEASE "${EXTRA_EXE_LINKER_FLAGS_RELEASE} /debug") - if(ENABLE_OPENMP) - set(EXTRA_C_FLAGS_RELEASE "${EXTRA_C_FLAGS_RELEASE} /openmp") - endif() + #if(ENABLE_OPENMP) + # set(EXTRA_C_FLAGS_RELEASE "${EXTRA_C_FLAGS_RELEASE} /openmp") + #endif() # Remove unreferenced functions: function level linking set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} /Gy") @@ -754,10 +754,10 @@ if(CMAKE_COMPILER_IS_GNUCXX) endif() # Parallel mode - if(ENABLE_OPENMP) - set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -D_GLIBCXX_PARALLEL -fopenmp") - set(OPENCV_LINKER_LIBS ${OPENCV_LINKER_LIBS} gomp) - endif() + #if(ENABLE_OPENMP) + # set(EXTRA_C_FLAGS "${EXTRA_C_FLAGS} -D_GLIBCXX_PARALLEL -fopenmp") + # set(OPENCV_LINKER_LIBS ${OPENCV_LINKER_LIBS} gomp) + #endif() set(EXTRA_C_FLAGS_RELEASE "${EXTRA_C_FLAGS_RELEASE} -DNDEBUG") set(EXTRA_C_FLAGS_DEBUG "${EXTRA_C_FLAGS_DEBUG} -O0 -DDEBUG -D_DEBUG") diff --git a/opencv/samples/c/peopledetect.cpp b/opencv/samples/c/peopledetect.cpp index 3c516ae0..9e4ec3be 100644 --- a/opencv/samples/c/peopledetect.cpp +++ b/opencv/samples/c/peopledetect.cpp @@ -36,6 +36,7 @@ int main(int argc, char** argv) HOGDescriptor hog; hog.setSVMDetector(HOGDescriptor::getDefaultPeopleDetector()); + namedWindow("people detector", 1); for(;;) { diff --git a/opencv/src/cv/cvlkpyramid.cpp b/opencv/src/cv/cvlkpyramid.cpp index 1b735bf8..ffda27ae 100644 --- a/opencv/src/cv/cvlkpyramid.cpp +++ b/opencv/src/cv/cvlkpyramid.cpp @@ -391,7 +391,7 @@ icvInitPyramidalAlgorithm( const CvMat* imgA, const CvMat* imgB, CV_Error( CV_StsNullPtr, "Some of the precomputed pyramids are missing" ); if( level < 0 ) - CV_Error( CV_StsOutOfRange, "The number of pyramid layers is negative" ); + CV_Error( CV_StsOutOfRange, "The number of pyramid levels is negative" ); switch( criteria->type ) { @@ -542,147 +542,50 @@ icvCalcIxIy_32f( const float* src, int src_step, float* dstX, float* dstY, int d } -CV_IMPL void -cvCalcOpticalFlowPyrLK( const void* arrA, const void* arrB, - void* pyrarrA, void* pyrarrB, - const CvPoint2D32f * featuresA, - CvPoint2D32f * featuresB, - int count, CvSize winSize, int level, - char *status, float *error, - CvTermCriteria criteria, int flags ) +namespace cv { - cv::AutoBuffer pyrBuffer; - cv::AutoBuffer buffer; - cv::AutoBuffer _status; - - const int MAX_ITERS = 100; - - CvMat stubA, *imgA = (CvMat*)arrA; - CvMat stubB, *imgB = (CvMat*)arrB; - CvMat pstubA, *pyrA = (CvMat*)pyrarrA; - CvMat pstubB, *pyrB = (CvMat*)pyrarrB; - CvSize imgSize; - static const float smoothKernel[] = { 0.09375, 0.3125, 0.09375 }; /* 3/32, 10/32, 3/32 */ - - int bufferBytes = 0; - uchar **imgI = 0; - uchar **imgJ = 0; - int *step = 0; - double *scale = 0; - CvSize* size = 0; - - int threadCount = cvGetNumThreads(); - float* _patchI[CV_MAX_THREADS]; - float* _patchJ[CV_MAX_THREADS]; - float* _Ix[CV_MAX_THREADS]; - float* _Iy[CV_MAX_THREADS]; - - int i, l; - - CvSize patchSize = cvSize( winSize.width * 2 + 1, winSize.height * 2 + 1 ); - int patchLen = patchSize.width * patchSize.height; - int srcPatchLen = (patchSize.width + 2)*(patchSize.height + 2); - - imgA = cvGetMat( imgA, &stubA ); - imgB = cvGetMat( imgB, &stubB ); - - if( CV_MAT_TYPE( imgA->type ) != CV_8UC1 ) - CV_Error( CV_StsUnsupportedFormat, "" ); - - if( !CV_ARE_TYPES_EQ( imgA, imgB )) - CV_Error( CV_StsUnmatchedFormats, "" ); - - if( !CV_ARE_SIZES_EQ( imgA, imgB )) - CV_Error( CV_StsUnmatchedSizes, "" ); - - if( imgA->step != imgB->step ) - CV_Error( CV_StsUnmatchedSizes, "imgA and imgB must have equal steps" ); - - imgSize = cvGetMatSize( imgA ); - - if( pyrA ) - { - pyrA = cvGetMat( pyrA, &pstubA ); - - if( pyrA->step*pyrA->height < icvMinimalPyramidSize( imgSize ) ) - CV_Error( CV_StsBadArg, "pyramid A has insufficient size" ); - } - else - { - pyrA = &pstubA; - pyrA->data.ptr = 0; - } - - if( pyrB ) - { - pyrB = cvGetMat( pyrB, &pstubB ); - - if( pyrB->step*pyrB->height < icvMinimalPyramidSize( imgSize ) ) - CV_Error( CV_StsBadArg, "pyramid B has insufficient size" ); - } - else - { - pyrB = &pstubB; - pyrB->data.ptr = 0; - } - - if( count == 0 ) - return; - if( !featuresA || !featuresB ) - CV_Error( CV_StsNullPtr, "Some of arrays of point coordinates are missing" ); - - if( count < 0 ) - CV_Error( CV_StsOutOfRange, "The number of tracked points is negative or zero" ); - - if( winSize.width <= 1 || winSize.height <= 1 ) - CV_Error( CV_StsBadSize, "Invalid search window size" ); - - for( i = 0; i < threadCount; i++ ) - _patchI[i] = _patchJ[i] = _Ix[i] = _Iy[i] = 0; - - icvInitPyramidalAlgorithm( imgA, imgB, pyrA, pyrB, - level, &criteria, MAX_ITERS, flags, - &imgI, &imgJ, &step, &size, &scale, &pyrBuffer ); - - if( !status ) +struct LKTrackerInvoker +{ + LKTrackerInvoker( const CvMat* _imgI, const CvMat* _imgJ, + const CvPoint2D32f* _featuresA, + CvPoint2D32f* _featuresB, + char* _status, float* _error, + CvTermCriteria _criteria, + CvSize _winSize, int _level, int _flags ) { - _status.allocate(count); + imgI = _imgI; + imgJ = _imgJ; + featuresA = _featuresA; + featuresB = _featuresB; status = _status; + error = _error; + criteria = _criteria; + winSize = _winSize; + level = _level; + flags = _flags; } - - /* buffer_size = + */ - bufferBytes = (srcPatchLen + patchLen * 3) * sizeof( _patchI[0][0] ) * threadCount; - buffer.allocate(bufferBytes); - - for( i = 0; i < threadCount; i++ ) - { - _patchI[i] = i == 0 ? (float*)(uchar*)buffer : _Iy[i-1] + patchLen; - _patchJ[i] = _patchI[i] + srcPatchLen; - _Ix[i] = _patchJ[i] + patchLen; - _Iy[i] = _Ix[i] + patchLen; - } - - memset( status, 1, count ); - if( error ) - memset( error, 0, count*sizeof(error[0]) ); - - if( !(flags & CV_LKFLOW_INITIAL_GUESSES) ) - memcpy( featuresB, featuresA, count*sizeof(featuresA[0])); - - /* do processing from top pyramid level (smallest image) - to the bottom (original image) */ - for( l = level; l >= 0; l-- ) + + void operator()(const BlockedRange& range) const { - CvSize levelSize = size[l]; - int levelStep = step[l]; - - { -#ifdef _OPENMP - #pragma omp parallel for num_threads(threadCount) schedule(dynamic) -#endif // _OPENMP - /* find flow for each given point */ - for( i = 0; i < count; i++ ) + static const float smoothKernel[] = { 0.09375, 0.3125, 0.09375 }; // 3/32, 10/32, 3/32 + + int i, i1 = range.begin(), i2 = range.end(); + + CvSize patchSize = cvSize( winSize.width * 2 + 1, winSize.height * 2 + 1 ); + int patchLen = patchSize.width * patchSize.height; + int srcPatchLen = (patchSize.width + 2)*(patchSize.height + 2); + + AutoBuffer buf(patchLen*3 + srcPatchLen); + float* patchI = buf; + float* patchJ = patchI + srcPatchLen; + float* Ix = patchJ + patchLen; + float* Iy = Ix + patchLen; + float scaleL = 1.f/(1 << level); + CvSize levelSize = cvGetMatSize(imgI); + + // find flow for each given point + for( i = i1; i < i2; i++ ) { CvPoint2D32f v; CvPoint minI, maxI, minJ, maxJ; @@ -693,91 +596,76 @@ cvCalcOpticalFlowPyrLK( const void* arrA, const void* arrB, double Gxx = 0, Gxy = 0, Gyy = 0, D = 0, minEig = 0; float prev_mx = 0, prev_my = 0; int j, x, y; - int threadIdx = cvGetThreadNum(); - float* patchI = _patchI[threadIdx]; - float* patchJ = _patchJ[threadIdx]; - float* Ix = _Ix[threadIdx]; - float* Iy = _Iy[threadIdx]; - - v.x = featuresB[i].x; - v.y = featuresB[i].y; - if( l < level ) - { - v.x += v.x; - v.y += v.y; - } - else - { - v.x = (float)(v.x * scale[l]); - v.y = (float)(v.y * scale[l]); - } - + + v.x = featuresB[i].x*2; + v.y = featuresB[i].y*2; + pt_status = status[i]; if( !pt_status ) continue; - - minI = maxI = minJ = maxJ = cvPoint( 0, 0 ); - - u.x = (float) (featuresA[i].x * scale[l]); - u.y = (float) (featuresA[i].y * scale[l]); - + + minI = maxI = minJ = maxJ = cvPoint(0, 0); + + u.x = featuresA[i].x * scaleL; + u.y = featuresA[i].y * scaleL; + intersect( u, winSize, levelSize, &minI, &maxI ); isz = jsz = cvSize(maxI.x - minI.x + 2, maxI.y - minI.y + 2); u.x += (minI.x - (patchSize.width - maxI.x + 1))*0.5f; u.y += (minI.y - (patchSize.height - maxI.y + 1))*0.5f; - + if( isz.width < 3 || isz.height < 3 || - icvGetRectSubPix_8u32f_C1R( imgI[l], levelStep, levelSize, - patchI, isz.width*sizeof(patchI[0]), isz, u ) < 0 ) + icvGetRectSubPix_8u32f_C1R( imgI->data.ptr, imgI->step, levelSize, + patchI, isz.width*sizeof(patchI[0]), isz, u ) < 0 ) { - /* point is outside the image. take the next */ + // point is outside the first image. take the next status[i] = 0; continue; } - + icvCalcIxIy_32f( patchI, isz.width*sizeof(patchI[0]), Ix, Iy, - (isz.width-2)*sizeof(patchI[0]), isz, smoothKernel, patchJ ); - + (isz.width-2)*sizeof(patchI[0]), isz, smoothKernel, patchJ ); + for( j = 0; j < criteria.max_iter; j++ ) { double bx = 0, by = 0; float mx, my; CvPoint2D32f _v; - + intersect( v, winSize, levelSize, &minJ, &maxJ ); - + minJ.x = MAX( minJ.x, minI.x ); minJ.y = MAX( minJ.y, minI.y ); - + maxJ.x = MIN( maxJ.x, maxI.x ); maxJ.y = MIN( maxJ.y, maxI.y ); - + jsz = cvSize(maxJ.x - minJ.x, maxJ.y - minJ.y); - + _v.x = v.x + (minJ.x - (patchSize.width - maxJ.x + 1))*0.5f; _v.y = v.y + (minJ.y - (patchSize.height - maxJ.y + 1))*0.5f; - + if( jsz.width < 1 || jsz.height < 1 || - icvGetRectSubPix_8u32f_C1R( imgJ[l], levelStep, levelSize, patchJ, + icvGetRectSubPix_8u32f_C1R( imgJ->data.ptr, imgJ->step, levelSize, patchJ, jsz.width*sizeof(patchJ[0]), jsz, _v ) < 0 ) { - /* point is outside image. take the next */ + // point is outside of the second image. take the next pt_status = 0; break; } - + if( maxJ.x == prev_maxJ.x && maxJ.y == prev_maxJ.y && minJ.x == prev_minJ.x && minJ.y == prev_minJ.y ) { for( y = 0; y < jsz.height; y++ ) { const float* pi = patchI + - (y + minJ.y - minI.y + 1)*isz.width + minJ.x - minI.x + 1; + (y + minJ.y - minI.y + 1)*isz.width + minJ.x - minI.x + 1; const float* pj = patchJ + y*jsz.width; const float* ix = Ix + - (y + minJ.y - minI.y)*(isz.width-2) + minJ.x - minI.x; + (y + minJ.y - minI.y)*(isz.width-2) + minJ.x - minI.x; const float* iy = Iy + (ix - Ix); - + for( x = 0; x < jsz.width; x++ ) { double t0 = pi[x] - pj[x]; @@ -792,12 +680,12 @@ cvCalcOpticalFlowPyrLK( const void* arrA, const void* arrB, for( y = 0; y < jsz.height; y++ ) { const float* pi = patchI + - (y + minJ.y - minI.y + 1)*isz.width + minJ.x - minI.x + 1; + (y + minJ.y - minI.y + 1)*isz.width + minJ.x - minI.x + 1; const float* pj = patchJ + y*jsz.width; const float* ix = Ix + - (y + minJ.y - minI.y)*(isz.width-2) + minJ.x - minI.x; + (y + minJ.y - minI.y)*(isz.width-2) + minJ.x - minI.x; const float* iy = Iy + (ix - Ix); - + for( x = 0; x < jsz.width; x++ ) { double t = pi[x] - pj[x]; @@ -808,33 +696,33 @@ cvCalcOpticalFlowPyrLK( const void* arrA, const void* arrB, Gyy += iy[x] * iy[x]; } } - + D = Gxx * Gyy - Gxy * Gxy; if( D < DBL_EPSILON ) { pt_status = 0; break; } - + // Adi Shavit - 2008.05 if( flags & CV_LKFLOW_GET_MIN_EIGENVALS ) minEig = (Gyy + Gxx - sqrt((Gxx-Gyy)*(Gxx-Gyy) + 4.*Gxy*Gxy))/(2*jsz.height*jsz.width); - + D = 1. / D; - + prev_minJ = minJ; prev_maxJ = maxJ; } - + mx = (float) ((Gyy * bx - Gxy * by) * D); my = (float) ((Gxx * by - Gxy * bx) * D); - + v.x += mx; v.y += my; - + if( mx * mx + my * my < criteria.epsilon ) break; - + if( j > 0 && fabs(mx + prev_mx) < 0.01 && fabs(my + prev_my) < 0.01 ) { v.x -= mx*0.5f; @@ -844,12 +732,12 @@ cvCalcOpticalFlowPyrLK( const void* arrA, const void* arrB, prev_mx = mx; prev_my = my; } - + featuresB[i] = v; status[i] = (char)pt_status; - if( l == 0 && error && pt_status ) + if( level == 0 && error && pt_status ) { - /* calc error */ + // calc error double err = 0; if( flags & CV_LKFLOW_GET_MIN_EIGENVALS ) err = minEig; @@ -858,9 +746,9 @@ cvCalcOpticalFlowPyrLK( const void* arrA, const void* arrB, for( y = 0; y < jsz.height; y++ ) { const float* pi = patchI + - (y + minJ.y - minI.y + 1)*isz.width + minJ.x - minI.x + 1; + (y + minJ.y - minI.y + 1)*isz.width + minJ.x - minI.x + 1; const float* pj = patchJ + y*jsz.width; - + for( x = 0; x < jsz.width; x++ ) { double t = pi[x] - pj[x]; @@ -872,7 +760,143 @@ cvCalcOpticalFlowPyrLK( const void* arrA, const void* arrB, error[i] = (float)err; } } // end of point processing loop (i) - } + } + + const CvMat* imgI; + const CvMat* imgJ; + const CvPoint2D32f* featuresA; + CvPoint2D32f* featuresB; + char* status; + float* error; + CvTermCriteria criteria; + CvSize winSize; + int level; + int flags; +}; + + +} + + +CV_IMPL void +cvCalcOpticalFlowPyrLK( const void* arrA, const void* arrB, + void* pyrarrA, void* pyrarrB, + const CvPoint2D32f * featuresA, + CvPoint2D32f * featuresB, + int count, CvSize winSize, int level, + char *status, float *error, + CvTermCriteria criteria, int flags ) +{ + cv::AutoBuffer pyrBuffer; + cv::AutoBuffer buffer; + cv::AutoBuffer _status; + + const int MAX_ITERS = 100; + + CvMat stubA, *imgA = (CvMat*)arrA; + CvMat stubB, *imgB = (CvMat*)arrB; + CvMat pstubA, *pyrA = (CvMat*)pyrarrA; + CvMat pstubB, *pyrB = (CvMat*)pyrarrB; + CvSize imgSize; + + uchar **imgI = 0; + uchar **imgJ = 0; + int *step = 0; + double *scale = 0; + CvSize* size = 0; + + int i, l; + + imgA = cvGetMat( imgA, &stubA ); + imgB = cvGetMat( imgB, &stubB ); + + if( CV_MAT_TYPE( imgA->type ) != CV_8UC1 ) + CV_Error( CV_StsUnsupportedFormat, "" ); + + if( !CV_ARE_TYPES_EQ( imgA, imgB )) + CV_Error( CV_StsUnmatchedFormats, "" ); + + if( !CV_ARE_SIZES_EQ( imgA, imgB )) + CV_Error( CV_StsUnmatchedSizes, "" ); + + if( imgA->step != imgB->step ) + CV_Error( CV_StsUnmatchedSizes, "imgA and imgB must have equal steps" ); + + imgSize = cvGetMatSize( imgA ); + + if( pyrA ) + { + pyrA = cvGetMat( pyrA, &pstubA ); + + if( pyrA->step*pyrA->height < icvMinimalPyramidSize( imgSize ) ) + CV_Error( CV_StsBadArg, "pyramid A has insufficient size" ); + } + else + { + pyrA = &pstubA; + pyrA->data.ptr = 0; + } + + if( pyrB ) + { + pyrB = cvGetMat( pyrB, &pstubB ); + + if( pyrB->step*pyrB->height < icvMinimalPyramidSize( imgSize ) ) + CV_Error( CV_StsBadArg, "pyramid B has insufficient size" ); + } + else + { + pyrB = &pstubB; + pyrB->data.ptr = 0; + } + + if( count == 0 ) + return; + + if( !featuresA || !featuresB ) + CV_Error( CV_StsNullPtr, "Some of arrays of point coordinates are missing" ); + + if( count < 0 ) + CV_Error( CV_StsOutOfRange, "The number of tracked points is negative or zero" ); + + if( winSize.width <= 1 || winSize.height <= 1 ) + CV_Error( CV_StsBadSize, "Invalid search window size" ); + + icvInitPyramidalAlgorithm( imgA, imgB, pyrA, pyrB, + level, &criteria, MAX_ITERS, flags, + &imgI, &imgJ, &step, &size, &scale, &pyrBuffer ); + + if( !status ) + { + _status.allocate(count); + status = _status; + } + + memset( status, 1, count ); + if( error ) + memset( error, 0, count*sizeof(error[0]) ); + + if( !(flags & CV_LKFLOW_INITIAL_GUESSES) ) + memcpy( featuresB, featuresA, count*sizeof(featuresA[0])); + + for( i = 0; i < count; i++ ) + { + featuresB[i].x = (float)(featuresB[i].x * scale[level] * 0.5); + featuresB[i].y = (float)(featuresB[i].y * scale[level] * 0.5); + } + + /* do processing from top pyramid level (smallest image) + to the bottom (original image) */ + for( l = level; l >= 0; l-- ) + { + CvMat imgI_l, imgJ_l; + cvInitMatHeader(&imgI_l, size[l].height, size[l].width, imgA->type, imgI[l], step[l]); + cvInitMatHeader(&imgJ_l, size[l].height, size[l].width, imgB->type, imgJ[l], step[l]); + + cv::parallel_for(cv::BlockedRange(0, count), + cv::LKTrackerInvoker(&imgI_l, &imgJ_l, featuresA, + featuresB, status, error, + criteria, winSize, l, flags)); } // end of pyramid levels loop (l) } diff --git a/opencv/src/cvaux/cvhog.cpp b/opencv/src/cvaux/cvhog.cpp index 1b8a4c65..a3bc42d3 100644 --- a/opencv/src/cvaux/cvhog.cpp +++ b/opencv/src/cvaux/cvhog.cpp @@ -785,73 +785,84 @@ void HOGDescriptor::detect(const Mat& img, } } - -struct HOGThreadData + +struct HOGInvoker { - vector rectangles; - vector locations; - Mat smallerImgBuf; + HOGInvoker( const HOGDescriptor* _hog, const Mat& _img, + double _hitThreshold, Size _winStride, Size _padding, + const double* _levelScale, ConcurrentRectVector* _vec ) + { + hog = _hog; + img = _img; + hitThreshold = _hitThreshold; + winStride = _winStride; + padding = _padding; + levelScale = _levelScale; + vec = _vec; + } + + void operator()( const BlockedRange& range ) const + { + int i, i1 = range.begin(), i2 = range.end(); + double minScale = i1 > 0 ? levelScale[i1] : i2 > 1 ? levelScale[i1+1] : std::max(img.cols, img.rows); + Size maxSz(cvCeil(img.cols/minScale), cvCeil(img.rows/minScale)); + Mat smallerImgBuf(maxSz, img.type()); + vector locations; + + for( i = i1; i < i2; i++ ) + { + double scale = levelScale[i]; + Size sz(cvRound(img.cols/scale), cvRound(img.rows/scale)); + Mat smallerImg(sz, img.type(), smallerImgBuf.data); + if( sz == img.size() ) + smallerImg = Mat(sz, img.type(), img.data, img.step); + else + resize(img, smallerImg, sz); + hog->detect(smallerImg, locations, hitThreshold, winStride, padding); + Size scaledWinSize = Size(cvRound(hog->winSize.width*scale), cvRound(hog->winSize.height*scale)); + for( size_t j = 0; j < locations.size(); j++ ) + vec->push_back(Rect(cvRound(locations[j].x*scale), + cvRound(locations[j].y*scale), + scaledWinSize.width, scaledWinSize.height)); + } + } + + const HOGDescriptor* hog; + Mat img; + double hitThreshold; + Size winStride; + Size padding; + const double* levelScale; + ConcurrentRectVector* vec; }; + void HOGDescriptor::detectMultiScale( const Mat& img, vector& foundLocations, double hitThreshold, Size winStride, Size padding, double scale0, int groupThreshold) const { double scale = 1.; - foundLocations.clear(); - int i, levels = 0; const int maxLevels = 64; - int t, nthreads = getNumThreads(); - vector threadData(nthreads); - - for( t = 0; t < nthreads; t++ ) - threadData[t].smallerImgBuf.create(img.size(), img.type()); - - vector levelScale(maxLevels); - for( levels = 0; levels < maxLevels; levels++ ) + vector levelScale; + for( int levels = 0; levels < maxLevels; levels++ ) { - levelScale[levels] = scale; + levelScale.push_back(scale); if( cvRound(img.cols/scale) < winSize.width || cvRound(img.rows/scale) < winSize.height || scale0 <= 1 ) break; scale *= scale0; } - levels = std::max(levels, 1); - levelScale.resize(levels); - - { -#ifdef _OPENMP - #pragma omp parallel for num_threads(nthreads) schedule(dynamic) -#endif // _OPENMP - for( i = 0; i < levels; i++ ) - { - HOGThreadData& tdata = threadData[getThreadNum()]; - double scale = levelScale[i]; - Size sz(cvRound(img.cols/scale), cvRound(img.rows/scale)); - Mat smallerImg(sz, img.type(), tdata.smallerImgBuf.data); - if( sz == img.size() ) - smallerImg = Mat(sz, img.type(), img.data, img.step); - else - resize(img, smallerImg, sz); - detect(smallerImg, tdata.locations, hitThreshold, winStride, padding); - Size scaledWinSize = Size(cvRound(winSize.width*scale), cvRound(winSize.height*scale)); - for( size_t j = 0; j < tdata.locations.size(); j++ ) - tdata.rectangles.push_back(Rect( - cvRound(tdata.locations[j].x*scale), - cvRound(tdata.locations[j].y*scale), - scaledWinSize.width, scaledWinSize.height)); - } - } - for( t = 0; t < nthreads; t++ ) - { - HOGThreadData& tdata = threadData[t]; - std::copy(tdata.rectangles.begin(), tdata.rectangles.end(), - std::back_inserter(foundLocations)); - } + ConcurrentRectVector allCandidates; + + parallel_for(BlockedRange(0, (int)levelScale.size()), + HOGInvoker(this, img, hitThreshold, winStride, padding, &levelScale[0], &allCandidates)); + + foundLocations.resize(allCandidates.size()); + std::copy(allCandidates.begin(), allCandidates.end(), foundLocations.begin()); groupRectangles(foundLocations, groupThreshold, 0.2); }