2 #include "cascadeclassifier.h"
10 const double eps = 1e-5;
12 val = max( val, eps );
13 val = min( val, 1. - eps );
14 return log( val/(1. - val) );
17 #define CV_CMP_FLT(i,j) (i < j)
18 static CV_IMPLEMENT_QSORT_EX( icvSortFlt, float, CV_CMP_FLT, const float* )
20 #define CV_CMP_NUM_IDX(i,j) (aux[i] < aux[j])
21 static CV_IMPLEMENT_QSORT_EX( icvSortIntAux, int, CV_CMP_NUM_IDX, const float* )
22 static CV_IMPLEMENT_QSORT_EX( icvSortUShAux, unsigned short, CV_CMP_NUM_IDX, const float* )
24 #define CV_THRESHOLD_EPS (0.00001F)
26 static const int MinBlockSize = 1 << 16;
27 static const int BlockSizeDelta = 1 << 10;
29 //----------------------------- CascadeBoostParams -------------------------------------------------
31 CvCascadeBoostParams::CvCascadeBoostParams() : minHitRate( 0.995F), maxFalseAlarm( 0.5F )
33 boost_type = CvBoost::GENTLE;
34 use_surrogates = use_1se_rule = truncate_pruned_tree = false;
37 CvCascadeBoostParams::CvCascadeBoostParams( int _boostType,
38 float _minHitRate, float _maxFalseAlarm,
39 double _weightTrimRate, int _maxDepth, int _maxWeakCount ) :
40 CvBoostParams( _boostType, _maxWeakCount, _weightTrimRate, _maxDepth, false, 0 )
42 boost_type = CvBoost::GENTLE;
43 minHitRate = _minHitRate;
44 maxFalseAlarm = _maxFalseAlarm;
45 use_surrogates = use_1se_rule = truncate_pruned_tree = false;
48 void CvCascadeBoostParams::write( FileStorage &fs ) const
50 String boostTypeStr = boost_type == CvBoost::DISCRETE ? CC_DISCRETE_BOOST :
51 boost_type == CvBoost::REAL ? CC_REAL_BOOST :
52 boost_type == CvBoost::LOGIT ? CC_LOGIT_BOOST :
53 boost_type == CvBoost::GENTLE ? CC_GENTLE_BOOST : String();
54 CV_Assert( !boostTypeStr.empty() );
55 fs << CC_BOOST_TYPE << boostTypeStr;
56 fs << CC_MINHITRATE << minHitRate;
57 fs << CC_MAXFALSEALARM << maxFalseAlarm;
58 fs << CC_TRIM_RATE << weight_trim_rate;
59 fs << CC_MAX_DEPTH << max_depth;
60 fs << CC_WEAK_COUNT << weak_count;
63 bool CvCascadeBoostParams::read( const FileNode &node )
66 FileNode rnode = node[CC_BOOST_TYPE];
67 rnode >> boostTypeStr;
68 boost_type = !boostTypeStr.compare( CC_DISCRETE_BOOST ) ? CvBoost::DISCRETE :
69 !boostTypeStr.compare( CC_REAL_BOOST ) ? CvBoost::REAL :
70 !boostTypeStr.compare( CC_LOGIT_BOOST ) ? CvBoost::LOGIT :
71 !boostTypeStr.compare( CC_GENTLE_BOOST ) ? CvBoost::GENTLE : -1;
73 CV_Error( CV_StsBadArg, "unsupported Boost type" );
74 node[CC_MINHITRATE] >> minHitRate;
75 node[CC_MAXFALSEALARM] >> maxFalseAlarm;
76 node[CC_TRIM_RATE] >> weight_trim_rate ;
77 node[CC_MAX_DEPTH] >> max_depth ;
78 node[CC_WEAK_COUNT] >> weak_count ;
79 if ( minHitRate <= 0 || minHitRate > 1 ||
80 maxFalseAlarm <= 0 || maxFalseAlarm > 1 ||
81 weight_trim_rate <= 0 || weight_trim_rate > 1 ||
82 max_depth <= 0 || weak_count <= 0 )
83 CV_Error( CV_StsBadArg, "bad parameters range");
87 void CvCascadeBoostParams::printDefaults() const
89 cout << "--boostParams--" << endl;
90 cout << " [-bt <{" << CC_DISCRETE_BOOST << ", "
91 << CC_REAL_BOOST << ", "
92 << CC_LOGIT_BOOST ", "
93 << CC_GENTLE_BOOST << "(default)}>]" << endl;
94 cout << " [-minHitRate <min_hit_rate> = " << minHitRate << ">]" << endl;
95 cout << " [-maxFalseAlarmRate <max_false_alarm_rate = " << maxFalseAlarm << ">]" << endl;
96 cout << " [-weightTrimRate <weight_trim_rate = " << weight_trim_rate << ">]" << endl;
97 cout << " [-maxDepth <max_depth_of_weak_tree = " << max_depth << ">]" << endl;
98 cout << " [-maxWeakCount <max_weak_tree_count = " << weak_count << ">]" << endl;
101 void CvCascadeBoostParams::printAttrs() const
103 String boostTypeStr = boost_type == CvBoost::DISCRETE ? CC_DISCRETE_BOOST :
104 boost_type == CvBoost::REAL ? CC_REAL_BOOST :
105 boost_type == CvBoost::LOGIT ? CC_LOGIT_BOOST :
106 boost_type == CvBoost::GENTLE ? CC_GENTLE_BOOST : String();
107 CV_Assert( !boostTypeStr.empty() );
108 cout << "boostType: " << boostTypeStr << endl;
109 cout << "minHitRate: " << minHitRate << endl;
110 cout << "maxFalseAlarmRate: " << maxFalseAlarm << endl;
111 cout << "weightTrimRate: " << weight_trim_rate << endl;
112 cout << "maxTreeDepth: " << max_depth << endl;
113 cout << "maxWeakCount: " << weak_count << endl;
116 bool CvCascadeBoostParams::scanAttr( const String prmName, const String val)
120 if( !prmName.compare( "-bt" ) )
122 boost_type = !val.compare( CC_DISCRETE_BOOST ) ? CvBoost::DISCRETE :
123 !val.compare( CC_REAL_BOOST ) ? CvBoost::REAL :
124 !val.compare( CC_LOGIT_BOOST ) ? CvBoost::LOGIT :
125 !val.compare( CC_GENTLE_BOOST ) ? CvBoost::GENTLE : -1;
126 if (boost_type == -1)
129 else if( !prmName.compare( "-minHitRate" ) )
131 minHitRate = (float) atof( val.c_str() );
133 else if( !prmName.compare( "-maxFalseAlarmRate" ) )
135 weight_trim_rate = (float) atof( val.c_str() );
137 else if( !prmName.compare( "-weightTrimRate" ) )
139 weight_trim_rate = (float) atof( val.c_str() );
141 else if( !prmName.compare( "-maxDepth" ) )
143 max_depth = atoi( val.c_str() );
145 else if( !prmName.compare( "-maxWeakCount" ) )
147 weak_count = atoi( val.c_str() );
155 //---------------------------- CascadeBoostTrainData -----------------------------
157 CvCascadeBoostTrainData::CvCascadeBoostTrainData( const CvFeatureEvaluator* _featureEvaluator,
158 const CvDTreeParams& _params )
160 is_classifier = true;
161 var_all = var_count = (int)_featureEvaluator->getNumFeatures();
163 featureEvaluator = _featureEvaluator;
165 set_params( _params );
166 max_c_count = MAX( 2, featureEvaluator->getMaxCatCount() );
167 var_type = cvCreateMat( 1, var_count + 2, CV_32SC1 );
168 if ( featureEvaluator->getMaxCatCount() > 0 )
171 cat_var_count = var_count;
173 for( int vi = 0; vi < var_count; vi++ )
175 var_type->data.i[vi] = vi;
181 ord_var_count = var_count;
182 for( int vi = 1; vi <= var_count; vi++ )
184 var_type->data.i[vi-1] = -vi;
187 var_type->data.i[var_count] = cat_var_count;
188 var_type->data.i[var_count+1] = cat_var_count+1;
190 int maxSplitSize = cvAlign(sizeof(CvDTreeSplit) + (MAX(0,max_c_count - 33)/32)*sizeof(int),sizeof(void*));
191 int treeBlockSize = MAX((int)sizeof(CvDTreeNode)*8, maxSplitSize);
192 treeBlockSize = MAX(treeBlockSize + BlockSizeDelta, MinBlockSize);
193 tree_storage = cvCreateMemStorage( treeBlockSize );
194 node_heap = cvCreateSet( 0, sizeof(node_heap[0]), sizeof(CvDTreeNode), tree_storage );
195 split_heap = cvCreateSet( 0, sizeof(split_heap[0]), maxSplitSize, tree_storage );
198 CvCascadeBoostTrainData::CvCascadeBoostTrainData( const CvFeatureEvaluator* _featureEvaluator,
200 int _precalcValBufSize, int _precalcIdxBufSize,
201 const CvDTreeParams& _params )
203 setData( _featureEvaluator, _numSamples, _precalcValBufSize, _precalcIdxBufSize, _params );
206 void CvCascadeBoostTrainData::setData( const CvFeatureEvaluator* _featureEvaluator,
208 int _precalcValBufSize, int _precalcIdxBufSize,
209 const CvDTreeParams& _params )
212 unsigned short* udst = 0;
218 is_classifier = true;
222 set_params( _params );
224 CV_Assert( _featureEvaluator );
225 featureEvaluator = _featureEvaluator;
227 max_c_count = MAX( 2, featureEvaluator->getMaxCatCount() );
228 _resp = featureEvaluator->getCls();
230 // TODO: check responses: elements must be 0 or 1
232 if( _precalcValBufSize < 0 || _precalcIdxBufSize < 0)
233 CV_Error( CV_StsOutOfRange, "_numPrecalcVal and _numPrecalcIdx must be positive or 0" );
235 var_count = var_all = featureEvaluator->getNumFeatures();
236 sample_count = _numSamples;
239 if (sample_count < 65536)
242 numPrecalcVal = min( (_precalcValBufSize*1048576) / int(sizeof(float)*sample_count), var_count );
243 numPrecalcIdx = min( (_precalcIdxBufSize*1048576) /
244 int((is_buf_16u ? sizeof(unsigned short) : sizeof (int))*sample_count), var_count );
246 valCache.create( numPrecalcVal, sample_count, CV_32FC1 );
247 var_type = cvCreateMat( 1, var_count + 2, CV_32SC1 );
249 if ( featureEvaluator->getMaxCatCount() > 0 )
252 cat_var_count = var_count;
254 for( int vi = 0; vi < var_count; vi++ )
256 var_type->data.i[vi] = vi;
262 ord_var_count = var_count;
263 for( int vi = 1; vi <= var_count; vi++ )
265 var_type->data.i[vi-1] = -vi;
268 var_type->data.i[var_count] = cat_var_count;
269 var_type->data.i[var_count+1] = cat_var_count+1;
270 work_var_count = ( cat_var_count ? 0 : numPrecalcIdx ) + 1;
271 buf_size = (work_var_count + 1) * sample_count;
275 buf = cvCreateMat( buf_count, buf_size, CV_16UC1 );
277 buf = cvCreateMat( buf_count, buf_size, CV_32SC1 );
279 cat_count = cvCreateMat( 1, cat_var_count + 1, CV_32SC1 );
281 // precalculate valCache and set indices in buf
284 // now calculate the maximum size of split,
285 // create memory storage that will keep nodes and splits of the decision tree
286 // allocate root node and the buffer for the whole training data
287 int maxSplitSize = cvAlign(sizeof(CvDTreeSplit) +
288 (MAX(0,sample_count - 33)/32)*sizeof(int),sizeof(void*));
289 int treeBlockSize = MAX((int)sizeof(CvDTreeNode)*8, maxSplitSize);
290 treeBlockSize = MAX(treeBlockSize + BlockSizeDelta, MinBlockSize);
291 tree_storage = cvCreateMemStorage( treeBlockSize );
292 node_heap = cvCreateSet( 0, sizeof(*node_heap), sizeof(CvDTreeNode), tree_storage );
294 int nvSize = var_count*sizeof(int);
295 nvSize = cvAlign(MAX( nvSize, (int)sizeof(CvSetElem) ), sizeof(void*));
296 int tempBlockSize = nvSize;
297 tempBlockSize = MAX( tempBlockSize + BlockSizeDelta, MinBlockSize );
298 temp_storage = cvCreateMemStorage( tempBlockSize );
299 nv_heap = cvCreateSet( 0, sizeof(*nv_heap), nvSize, temp_storage );
301 data_root = new_node( 0, sample_count, 0, 0 );
305 udst = (unsigned short*)(buf->data.s + work_var_count*sample_count);
307 idst = buf->data.i + work_var_count*sample_count;
309 for (int si = 0; si < sample_count; si++)
312 udst[si] = (unsigned short)si;
316 for( int vi = 0; vi < var_count; vi++ )
317 data_root->set_num_valid(vi, sample_count);
318 for( int vi = 0; vi < cat_var_count; vi++ )
319 cat_count->data.i[vi] = max_c_count;
321 cat_count->data.i[cat_var_count] = 2;
323 maxSplitSize = cvAlign(sizeof(CvDTreeSplit) +
324 (MAX(0,max_c_count - 33)/32)*sizeof(int),sizeof(void*));
325 split_heap = cvCreateSet( 0, sizeof(*split_heap), maxSplitSize, tree_storage );
327 priors = cvCreateMat( 1, get_num_classes(), CV_64F );
328 cvSet(priors, cvScalar(1));
329 priors_mult = cvCloneMat( priors );
330 counts = cvCreateMat( 1, get_num_classes(), CV_32SC1 );
331 direction = cvCreateMat( 1, sample_count, CV_8UC1 );
332 split_buf = cvCreateMat( 1, sample_count, CV_32SC1 );
335 void CvCascadeBoostTrainData::free_train_data()
337 CvDTreeTrainData::free_train_data();
341 const int* CvCascadeBoostTrainData::get_class_labels( CvDTreeNode* n, int* labelsBuf)
343 int nodeSampleCount = n->sample_count;
344 int rStep = CV_IS_MAT_CONT( responses->type ) ? 1 : responses->step / CV_ELEM_SIZE( responses->type );
346 int* sampleIndicesBuf = labelsBuf; //
347 const int* sampleIndices = get_sample_indices(n, sampleIndicesBuf);
348 for( int si = 0; si < nodeSampleCount; si++ )
350 int sidx = sampleIndices[si];
351 labelsBuf[si] = (int)responses->data.fl[sidx*rStep];
356 const int* CvCascadeBoostTrainData::get_sample_indices( CvDTreeNode* n, int* indicesBuf )
358 return CvDTreeTrainData::get_cat_var_data( n, get_work_var_count(), indicesBuf );
361 const int* CvCascadeBoostTrainData::get_cv_labels( CvDTreeNode* n, int* labels_buf )
363 return CvDTreeTrainData::get_cat_var_data( n, get_work_var_count() - 1, labels_buf );
366 void CvCascadeBoostTrainData::get_ord_var_data( CvDTreeNode* n, int vi, float* ordValuesBuf, int* sortedIndicesBuf,
367 const float** ordValues, const int** sortedIndices, int* sampleIndicesBuf )
369 int nodeSampleCount = n->sample_count;
370 const int* sampleIndices = get_sample_indices(n, sampleIndicesBuf);
372 if ( vi < numPrecalcIdx )
375 *sortedIndices = buf->data.i + n->buf_idx*buf->cols + vi*sample_count + n->offset;
378 const unsigned short* shortIndices = (const unsigned short*)(buf->data.s + n->buf_idx*buf->cols +
379 vi*sample_count + n->offset );
380 for( int i = 0; i < nodeSampleCount; i++ )
381 sortedIndicesBuf[i] = shortIndices[i];
382 *sortedIndices = sortedIndicesBuf;
385 if ( vi < numPrecalcVal )
387 for( int i = 0; i < nodeSampleCount; i++ )
389 int idx = (*sortedIndices)[i];
390 idx = sampleIndices[idx];
391 ordValuesBuf[i] = valCache.at<float>( vi, idx);
396 for( int i = 0; i < nodeSampleCount; i++ )
398 int idx = (*sortedIndices)[i];
399 idx = sampleIndices[idx];
400 ordValuesBuf[i] = (*featureEvaluator)( vi, idx);
404 else // vi >= numPrecalcIdx
406 // use sample_indices as temporary buffer for values
407 if ( vi < numPrecalcVal )
409 for( int i = 0; i < nodeSampleCount; i++ )
411 sortedIndicesBuf[i] = i;
412 ((float*)sampleIndices)[i] = valCache.at<float>( vi, sampleIndices[i] );
417 for( int i = 0; i < nodeSampleCount; i++ )
419 sortedIndicesBuf[i] = i;
420 ((float*)sampleIndices)[i] = (*featureEvaluator)( vi, sampleIndices[i]);
423 icvSortIntAux( sortedIndicesBuf, sample_count, (float *)sampleIndices );
424 for( int i = 0; i < nodeSampleCount; i++ )
425 ordValuesBuf[i] = ((float*)sampleIndices)[sortedIndicesBuf[i]];
426 *sortedIndices = sortedIndicesBuf;
429 *ordValues = ordValuesBuf;
432 const int* CvCascadeBoostTrainData::get_cat_var_data( CvDTreeNode* n, int vi, int* catValuesBuf)
434 int nodeSampleCount = n->sample_count;
435 int* sampleIndicesBuf = catValuesBuf; //
436 const int* sampleIndices = get_sample_indices(n, sampleIndicesBuf);
438 if ( vi < numPrecalcVal )
440 for( int i = 0; i < nodeSampleCount; i++ )
441 catValuesBuf[i] = (int) valCache.at<float>( vi, sampleIndices[i]);
445 for( int i = 0; i < nodeSampleCount; i++ )
446 catValuesBuf[i] = (int)(*featureEvaluator)( vi, sampleIndices[i] );
451 float CvCascadeBoostTrainData::getVarValue( int vi, int si )
453 if ( vi < numPrecalcVal && !valCache.empty() )
454 return valCache.at<float>( vi, si );
455 return (*featureEvaluator)( vi, si );
459 struct FeatureIdxOnlyPrecalc
461 FeatureIdxOnlyPrecalc( const CvFeatureEvaluator* _feval, CvMat* _buf, int _sample_count, bool _is_buf_16u )
464 sample_count = _sample_count;
465 udst = (unsigned short*)_buf->data.s;
467 is_buf_16u = _is_buf_16u;
469 void operator()( const BlockedRange& range ) const
471 cv::AutoBuffer<float> valCache(sample_count);
472 float* valCachePtr = (float*)valCache;
473 for ( int fi = range.begin(); fi < range.end(); fi++)
475 for( int si = 0; si < sample_count; si++ )
477 valCachePtr[si] = (*feval)( fi, si );
479 *(udst + fi*sample_count + si) = (unsigned short)si;
481 *(idst + fi*sample_count + si) = si;
484 icvSortUShAux( udst + fi*sample_count, sample_count, valCachePtr );
486 icvSortIntAux( idst + fi*sample_count, sample_count, valCachePtr );
489 const CvFeatureEvaluator* feval;
492 unsigned short* udst;
496 struct FeatureValAndIdxPrecalc
498 FeatureValAndIdxPrecalc( const CvFeatureEvaluator* _feval, CvMat* _buf, Mat* _valCache, int _sample_count, bool _is_buf_16u )
501 valCache = _valCache;
502 sample_count = _sample_count;
503 udst = (unsigned short*)_buf->data.s;
505 is_buf_16u = _is_buf_16u;
507 void operator()( const BlockedRange& range ) const
509 for ( int fi = range.begin(); fi < range.end(); fi++)
511 for( int si = 0; si < sample_count; si++ )
513 valCache->at<float>(fi,si) = (*feval)( fi, si );
515 *(udst + fi*sample_count + si) = (unsigned short)si;
517 *(idst + fi*sample_count + si) = si;
520 icvSortUShAux( udst + fi*sample_count, sample_count, valCache->ptr<float>(fi) );
522 icvSortIntAux( idst + fi*sample_count, sample_count, valCache->ptr<float>(fi) );
525 const CvFeatureEvaluator* feval;
529 unsigned short* udst;
533 struct FeatureValOnlyPrecalc
535 FeatureValOnlyPrecalc( const CvFeatureEvaluator* _feval, Mat* _valCache, int _sample_count )
538 valCache = _valCache;
539 sample_count = _sample_count;
541 void operator()( const BlockedRange& range ) const
543 for ( int fi = range.begin(); fi < range.end(); fi++)
544 for( int si = 0; si < sample_count; si++ )
545 valCache->at<float>(fi,si) = (*feval)( fi, si );
547 const CvFeatureEvaluator* feval;
552 void CvCascadeBoostTrainData::precalculate()
554 int minNum = MIN( numPrecalcVal, numPrecalcIdx);
555 CV_DbgAssert( !valCache.empty() );
557 double proctime = -TIME( 0 );
558 parallel_for( BlockedRange(numPrecalcVal, numPrecalcIdx),
559 FeatureIdxOnlyPrecalc(featureEvaluator, buf, sample_count, is_buf_16u) );
560 parallel_for( BlockedRange(0, minNum),
561 FeatureValAndIdxPrecalc(featureEvaluator, buf, &valCache, sample_count, is_buf_16u) );
562 parallel_for( BlockedRange(minNum, numPrecalcVal),
563 FeatureValOnlyPrecalc(featureEvaluator, &valCache, sample_count) );
564 cout << "Precalculation time: " << (proctime + TIME( 0 )) << endl;
567 //-------------------------------- CascadeBoostTree ----------------------------------------
569 CvDTreeNode* CvCascadeBoostTree::predict( int sampleIdx ) const
571 CvDTreeNode* node = root;
573 CV_Error( CV_StsError, "The tree has not been trained yet" );
575 if ( ((CvCascadeBoostTrainData*)data)->featureEvaluator->getMaxCatCount() == 0 ) // ordered
579 CvDTreeSplit* split = node->split;
580 float val = ((CvCascadeBoostTrainData*)data)->getVarValue( split->var_idx, sampleIdx );
581 node = val <= split->ord.c ? node->left : node->right;
588 CvDTreeSplit* split = node->split;
589 int c = (int)((CvCascadeBoostTrainData*)data)->getVarValue( split->var_idx, sampleIdx );
590 node = CV_DTREE_CAT_DIR(c, split->subset) < 0 ? node->left : node->right;
596 void CvCascadeBoostTree::write( FileStorage &fs, const Mat& featureMap )
598 int maxCatCount = ((CvCascadeBoostTrainData*)data)->featureEvaluator->getMaxCatCount();
599 int subsetN = (maxCatCount + 31)/32;
600 queue<CvDTreeNode*> internalNodesQueue;
601 int size = (int)pow( 2.f, (float)ensemble->get_params().max_depth);
602 Ptr<float> leafVals = new float[size];
604 int internalNodeIdx = 1;
605 CvDTreeNode* tempNode;
607 CV_DbgAssert( root );
608 internalNodesQueue.push( root );
611 fs << CC_INTERNAL_NODES << "[:";
612 while (!internalNodesQueue.empty())
614 tempNode = internalNodesQueue.front();
615 CV_Assert( tempNode->left );
616 if ( !tempNode->left->left && !tempNode->left->right) // left node is leaf
618 leafVals[-leafValIdx] = (float)tempNode->left->value;
623 internalNodesQueue.push( tempNode->left );
624 fs << internalNodeIdx++;
626 CV_Assert( tempNode->right );
627 if ( !tempNode->right->left && !tempNode->right->right) // right node is leaf
629 leafVals[-leafValIdx] = (float)tempNode->right->value;
634 internalNodesQueue.push( tempNode->right );
635 fs << internalNodeIdx++;
637 int fidx = tempNode->split->var_idx;
638 fidx = featureMap.empty() ? fidx : featureMap.at<int>(0, fidx);
641 fs << tempNode->split->ord.c;
643 for( int i = 0; i < subsetN; i++ )
644 fs << tempNode->split->subset[i];
645 internalNodesQueue.pop();
647 fs << "]"; // CC_INTERNAL_NODES
649 fs << CC_LEAF_VALUES << "[:";
650 for (int ni = 0; ni < -leafValIdx; ni++)
652 fs << "]"; // CC_LEAF_VALUES
656 void CvCascadeBoostTree::read( const FileNode &node, CvBoost* _ensemble,
657 CvDTreeTrainData* _data )
659 int maxCatCount = ((CvCascadeBoostTrainData*)_data)->featureEvaluator->getMaxCatCount();
660 int subsetN = (maxCatCount + 31)/32;
661 int step = 3 + ( maxCatCount>0 ? subsetN : 1 );
663 queue<CvDTreeNode*> internalNodesQueue;
664 FileNodeIterator internalNodesIt, leafValsuesIt;
665 CvDTreeNode* prntNode, *cldNode;
669 ensemble = _ensemble;
673 FileNode rnode = node[CC_INTERNAL_NODES];
674 internalNodesIt = rnode.end();
675 leafValsuesIt = node[CC_LEAF_VALUES].end();
676 internalNodesIt--; leafValsuesIt--;
677 for( size_t i = 0; i < rnode.size()/step; i++ )
679 prntNode = data->new_node( 0, 0, 0, 0 );
680 if ( maxCatCount > 0 )
682 prntNode->split = data->new_split_cat( 0, 0 );
683 for( int j = subsetN-1; j>=0; j--)
685 *internalNodesIt >> prntNode->split->subset[j]; internalNodesIt--;
691 *internalNodesIt >> split_value; internalNodesIt--;
692 prntNode->split = data->new_split_ord( 0, split_value, 0, 0, 0);
694 *internalNodesIt >> prntNode->split->var_idx; internalNodesIt--;
696 *internalNodesIt >> ridx; internalNodesIt--;
697 *internalNodesIt >> lidx;internalNodesIt--;
700 prntNode->right = cldNode = data->new_node( 0, 0, 0, 0 );
701 *leafValsuesIt >> cldNode->value; leafValsuesIt--;
702 cldNode->parent = prntNode;
706 prntNode->right = internalNodesQueue.front();
707 prntNode->right->parent = prntNode;
708 internalNodesQueue.pop();
713 prntNode->left = cldNode = data->new_node( 0, 0, 0, 0 );
714 *leafValsuesIt >> cldNode->value; leafValsuesIt--;
715 cldNode->parent = prntNode;
719 prntNode->left = internalNodesQueue.front();
720 prntNode->left->parent = prntNode;
721 internalNodesQueue.pop();
724 internalNodesQueue.push( prntNode );
727 root = internalNodesQueue.front();
728 internalNodesQueue.pop();
731 void CvCascadeBoostTree::split_node_data( CvDTreeNode* node )
733 int n = node->sample_count, nl, nr, scount = data->sample_count;
734 char* dir = (char*)data->direction->data.ptr;
735 CvDTreeNode *left = 0, *right = 0;
736 int* newIdx = data->split_buf->data.i;
737 int newBufIdx = data->get_child_buf_idx( node );
738 int workVarCount = data->get_work_var_count();
739 CvMat* buf = data->buf;
740 cv::AutoBuffer<uchar> inn_buf(n*(3*sizeof(int)+sizeof(float)));
741 int* tempBuf = (int*)(uchar*)inn_buf;
744 complete_node_dir(node);
746 for( int i = nl = nr = 0; i < n; i++ )
749 // initialize new indices for splitting ordered variables
750 newIdx[i] = (nl & (d-1)) | (nr & -d); // d ? ri : li
755 node->left = left = data->new_node( node, nl, newBufIdx, node->offset );
756 node->right = right = data->new_node( node, nr, newBufIdx, node->offset + nl );
758 splitInputData = node->depth + 1 < data->params.max_depth &&
759 (node->left->sample_count > data->params.min_sample_count ||
760 node->right->sample_count > data->params.min_sample_count);
762 // split ordered variables, keep both halves sorted.
763 for( int vi = 0; vi < ((CvCascadeBoostTrainData*)data)->numPrecalcIdx; vi++ )
765 int ci = data->get_var_type(vi);
766 if( ci >= 0 || !splitInputData )
769 int n1 = node->get_num_valid(vi);
770 float *src_val_buf = (float*)(tempBuf + n);
771 int *src_sorted_idx_buf = (int*)(src_val_buf + n);
772 int *src_sample_idx_buf = src_sorted_idx_buf + n;
773 const int* src_sorted_idx = 0;
774 const float* src_val = 0;
775 data->get_ord_var_data(node, vi, src_val_buf, src_sorted_idx_buf, &src_val, &src_sorted_idx, src_sample_idx_buf);
777 for(int i = 0; i < n; i++)
778 tempBuf[i] = src_sorted_idx[i];
780 if (data->is_buf_16u)
782 unsigned short *ldst, *rdst, *ldst0, *rdst0;
783 ldst0 = ldst = (unsigned short*)(buf->data.s + left->buf_idx*buf->cols +
784 vi*scount + left->offset);
785 rdst0 = rdst = (unsigned short*)(ldst + nl);
788 for( int i = 0; i < n1; i++ )
790 int idx = tempBuf[i];
795 *rdst = (unsigned short)idx;
800 *ldst = (unsigned short)idx;
806 left->set_num_valid(vi, (int)(ldst - ldst0));
807 right->set_num_valid(vi, (int)(rdst - rdst0));
811 int *ldst0, *ldst, *rdst0, *rdst;
812 ldst0 = ldst = buf->data.i + left->buf_idx*buf->cols +
813 vi*scount + left->offset;
814 rdst0 = rdst = buf->data.i + right->buf_idx*buf->cols +
815 vi*scount + right->offset;
818 for( int i = 0; i < n1; i++ )
820 int idx = tempBuf[i];
835 left->set_num_valid(vi, (int)(ldst - ldst0));
836 right->set_num_valid(vi, (int)(rdst - rdst0));
841 // split cv_labels using newIdx relocation table
842 int *src_lbls_buf = tempBuf + n;
843 const int* src_lbls = data->get_cv_labels(node, src_lbls_buf);
845 for(int i = 0; i < n; i++)
846 tempBuf[i] = src_lbls[i];
848 if (data->is_buf_16u)
850 unsigned short *ldst = (unsigned short *)(buf->data.s + left->buf_idx*buf->cols +
851 (workVarCount-1)*scount + left->offset);
852 unsigned short *rdst = (unsigned short *)(buf->data.s + right->buf_idx*buf->cols +
853 (workVarCount-1)*scount + right->offset);
855 for( int i = 0; i < n; i++ )
857 int idx = tempBuf[i];
860 *rdst = (unsigned short)idx;
865 *ldst = (unsigned short)idx;
873 int *ldst = buf->data.i + left->buf_idx*buf->cols +
874 (workVarCount-1)*scount + left->offset;
875 int *rdst = buf->data.i + right->buf_idx*buf->cols +
876 (workVarCount-1)*scount + right->offset;
878 for( int i = 0; i < n; i++ )
880 int idx = tempBuf[i];
893 for( int vi = 0; vi < data->var_count; vi++ )
895 left->set_num_valid(vi, (int)(nl));
896 right->set_num_valid(vi, (int)(nr));
899 // split sample indices
900 int *sampleIdx_src_buf = tempBuf + n;
901 const int* sampleIdx_src = data->get_sample_indices(node, sampleIdx_src_buf);
903 for(int i = 0; i < n; i++)
904 tempBuf[i] = sampleIdx_src[i];
906 if (data->is_buf_16u)
908 unsigned short* ldst = (unsigned short*)(buf->data.s + left->buf_idx*buf->cols +
909 workVarCount*scount + left->offset);
910 unsigned short* rdst = (unsigned short*)(buf->data.s + right->buf_idx*buf->cols +
911 workVarCount*scount + right->offset);
912 for (int i = 0; i < n; i++)
914 unsigned short idx = (unsigned short)tempBuf[i];
929 int* ldst = buf->data.i + left->buf_idx*buf->cols +
930 workVarCount*scount + left->offset;
931 int* rdst = buf->data.i + right->buf_idx*buf->cols +
932 workVarCount*scount + right->offset;
933 for (int i = 0; i < n; i++)
935 int idx = tempBuf[i];
949 // deallocate the parent node data that is not needed anymore
950 data->free_node_data(node);
953 void auxMarkFeaturesInMap( const CvDTreeNode* node, Mat& featureMap)
955 if ( node && node->split )
957 featureMap.ptr<int>(0)[node->split->var_idx] = 1;
958 auxMarkFeaturesInMap( node->left, featureMap );
959 auxMarkFeaturesInMap( node->right, featureMap );
963 void CvCascadeBoostTree::markFeaturesInMap( Mat& featureMap )
965 auxMarkFeaturesInMap( root, featureMap );
968 //----------------------------------- CascadeBoost --------------------------------------
970 bool CvCascadeBoost::train( const CvFeatureEvaluator* _featureEvaluator,
972 int _precalcValBufSize, int _precalcIdxBufSize,
973 const CvCascadeBoostParams& _params )
977 data = new CvCascadeBoostTrainData( _featureEvaluator, _numSamples,
978 _precalcValBufSize, _precalcIdxBufSize, _params );
979 CvMemStorage *storage = cvCreateMemStorage();
980 weak = cvCreateSeq( 0, sizeof(CvSeq), sizeof(CvBoostTree*), storage );
983 set_params( _params );
984 if ( (_params.boost_type == LOGIT) || (_params.boost_type == GENTLE) )
985 data->do_responses_copy();
989 cout << "+----+---------+---------+" << endl;
990 cout << "| N | HR | FA |" << endl;
991 cout << "+----+---------+---------+" << endl;
995 CvCascadeBoostTree* tree = new CvCascadeBoostTree;
996 if( !tree->train( data, subsample_mask, this ) )
998 // TODO: may be should finish the loop (!!!)
1003 cvSeqPush( weak, &tree );
1004 update_weights( tree );
1007 while( !isErrDesired() && (weak->total < params.weak_count) );
1009 data->is_classifier = true;
1010 data->free_train_data();
1014 float CvCascadeBoost::predict( int sampleIdx, bool returnSum ) const
1019 cvStartReadSeq( weak, &reader );
1020 cvSetSeqReaderPos( &reader, 0 );
1021 for( int i = 0; i < weak->total; i++ )
1024 CV_READ_SEQ_ELEM( wtree, reader );
1025 sum += ((CvCascadeBoostTree*)wtree)->predict(sampleIdx)->value;
1028 sum = sum < threshold - CV_THRESHOLD_EPS ? 0.0 : 1.0;
1032 bool CvCascadeBoost::set_params( const CvBoostParams& _params )
1034 minHitRate = ((CvCascadeBoostParams&)_params).minHitRate;
1035 maxFalseAlarm = ((CvCascadeBoostParams&)_params).maxFalseAlarm;
1036 return ( ( minHitRate > 0 ) && ( minHitRate < 1) &&
1037 ( maxFalseAlarm > 0 ) && ( maxFalseAlarm < 1) &&
1038 CvBoost::set_params( _params ));
1041 void CvCascadeBoost::update_weights( CvBoostTree* tree )
1043 int n = data->sample_count;
1048 const int* sampleIdx = 0;
1049 int inn_buf_size = ((params.boost_type == LOGIT) || (params.boost_type == GENTLE) ? n*sizeof(int) : 0) +
1050 ( !tree ? n*sizeof(int) : 0 );
1051 cv::AutoBuffer<uchar> inn_buf(inn_buf_size);
1052 uchar* cur_inn_buf_pos = (uchar*)inn_buf;
1053 if ( (params.boost_type == LOGIT) || (params.boost_type == GENTLE) )
1055 step = CV_IS_MAT_CONT(data->responses_copy->type) ?
1056 1 : data->responses_copy->step / CV_ELEM_SIZE(data->responses_copy->type);
1057 fdata = data->responses_copy->data.fl;
1058 sampleIdxBuf = (int*)cur_inn_buf_pos; cur_inn_buf_pos = (uchar*)(sampleIdxBuf + n);
1059 sampleIdx = data->get_sample_indices( data->data_root, sampleIdxBuf );
1061 CvMat* buf = data->buf;
1062 if( !tree ) // before training the first tree, initialize weights and other parameters
1064 int* classLabelsBuf = (int*)cur_inn_buf_pos; cur_inn_buf_pos = (uchar*)(classLabelsBuf + n);
1065 const int* classLabels = data->get_class_labels(data->data_root, classLabelsBuf);
1066 // in case of logitboost and gentle adaboost each weak tree is a regression tree,
1067 // so we need to convert class labels to floating-point values
1069 double p[2] = { 1, 1 };
1071 cvReleaseMat( &orig_response );
1072 cvReleaseMat( &sum_response );
1073 cvReleaseMat( &weak_eval );
1074 cvReleaseMat( &subsample_mask );
1075 cvReleaseMat( &weights );
1077 orig_response = cvCreateMat( 1, n, CV_32S );
1078 weak_eval = cvCreateMat( 1, n, CV_64F );
1079 subsample_mask = cvCreateMat( 1, n, CV_8U );
1080 weights = cvCreateMat( 1, n, CV_64F );
1081 subtree_weights = cvCreateMat( 1, n + 2, CV_64F );
1083 if (data->is_buf_16u)
1085 unsigned short* labels = (unsigned short*)(buf->data.s + data->data_root->buf_idx*buf->cols +
1086 data->data_root->offset + (data->work_var_count-1)*data->sample_count);
1087 for( int i = 0; i < n; i++ )
1089 // save original categorical responses {0,1}, convert them to {-1,1}
1090 orig_response->data.i[i] = classLabels[i]*2 - 1;
1091 // make all the samples active at start.
1092 // later, in trim_weights() deactivate/reactive again some, if need
1093 subsample_mask->data.ptr[i] = (uchar)1;
1094 // make all the initial weights the same.
1095 weights->data.db[i] = w0*p[classLabels[i]];
1096 // set the labels to find (from within weak tree learning proc)
1097 // the particular sample weight, and where to store the response.
1098 labels[i] = (unsigned short)i;
1103 int* labels = buf->data.i + data->data_root->buf_idx*buf->cols +
1104 data->data_root->offset + (data->work_var_count-1)*data->sample_count;
1106 for( int i = 0; i < n; i++ )
1108 // save original categorical responses {0,1}, convert them to {-1,1}
1109 orig_response->data.i[i] = classLabels[i]*2 - 1;
1110 subsample_mask->data.ptr[i] = (uchar)1;
1111 weights->data.db[i] = w0*p[classLabels[i]];
1116 if( params.boost_type == LOGIT )
1118 sum_response = cvCreateMat( 1, n, CV_64F );
1120 for( int i = 0; i < n; i++ )
1122 sum_response->data.db[i] = 0;
1123 fdata[sampleIdx[i]*step] = orig_response->data.i[i] > 0 ? 2.f : -2.f;
1126 // in case of logitboost each weak tree is a regression tree.
1127 // the target function values are recalculated for each of the trees
1128 data->is_classifier = false;
1130 else if( params.boost_type == GENTLE )
1132 for( int i = 0; i < n; i++ )
1133 fdata[sampleIdx[i]*step] = (float)orig_response->data.i[i];
1135 data->is_classifier = false;
1140 // at this moment, for all the samples that participated in the training of the most
1141 // recent weak classifier we know the responses. For other samples we need to compute them
1142 if( have_subsample )
1144 // invert the subsample mask
1145 cvXorS( subsample_mask, cvScalar(1.), subsample_mask );
1147 // run tree through all the non-processed samples
1148 for( int i = 0; i < n; i++ )
1149 if( subsample_mask->data.ptr[i] )
1151 weak_eval->data.db[i] = ((CvCascadeBoostTree*)tree)->predict( i )->value;
1155 // now update weights and other parameters for each type of boosting
1156 if( params.boost_type == DISCRETE )
1158 // Discrete AdaBoost:
1159 // weak_eval[i] (=f(x_i)) is in {-1,1}
1160 // err = sum(w_i*(f(x_i) != y_i))/sum(w_i)
1161 // C = log((1-err)/err)
1162 // w_i *= exp(C*(f(x_i) != y_i))
1165 double scale[] = { 1., 0. };
1167 for( int i = 0; i < n; i++ )
1169 double w = weights->data.db[i];
1171 err += w*(weak_eval->data.db[i] != orig_response->data.i[i]);
1176 C = err = -logRatio( err );
1177 scale[1] = exp(err);
1180 for( int i = 0; i < n; i++ )
1182 double w = weights->data.db[i]*
1183 scale[weak_eval->data.db[i] != orig_response->data.i[i]];
1185 weights->data.db[i] = w;
1190 else if( params.boost_type == REAL )
1193 // weak_eval[i] = f(x_i) = 0.5*log(p(x_i)/(1-p(x_i))), p(x_i)=P(y=1|x_i)
1194 // w_i *= exp(-y_i*f(x_i))
1196 for( int i = 0; i < n; i++ )
1197 weak_eval->data.db[i] *= -orig_response->data.i[i];
1199 cvExp( weak_eval, weak_eval );
1201 for( int i = 0; i < n; i++ )
1203 double w = weights->data.db[i]*weak_eval->data.db[i];
1205 weights->data.db[i] = w;
1208 else if( params.boost_type == LOGIT )
1211 // weak_eval[i] = f(x_i) in [-z_max,z_max]
1212 // sum_response = F(x_i).
1213 // F(x_i) += 0.5*f(x_i)
1214 // p(x_i) = exp(F(x_i))/(exp(F(x_i)) + exp(-F(x_i))=1/(1+exp(-2*F(x_i)))
1215 // reuse weak_eval: weak_eval[i] <- p(x_i)
1216 // w_i = p(x_i)*1(1 - p(x_i))
1217 // z_i = ((y_i+1)/2 - p(x_i))/(p(x_i)*(1 - p(x_i)))
1218 // store z_i to the data->data_root as the new target responses
1220 const double lbWeightThresh = FLT_EPSILON;
1221 const double lbZMax = 10.;
1223 for( int i = 0; i < n; i++ )
1225 double s = sum_response->data.db[i] + 0.5*weak_eval->data.db[i];
1226 sum_response->data.db[i] = s;
1227 weak_eval->data.db[i] = -2*s;
1230 cvExp( weak_eval, weak_eval );
1232 for( int i = 0; i < n; i++ )
1234 double p = 1./(1. + weak_eval->data.db[i]);
1235 double w = p*(1 - p), z;
1236 w = MAX( w, lbWeightThresh );
1237 weights->data.db[i] = w;
1239 if( orig_response->data.i[i] > 0 )
1242 fdata[sampleIdx[i]*step] = (float)min(z, lbZMax);
1247 fdata[sampleIdx[i]*step] = (float)-min(z, lbZMax);
1254 // weak_eval[i] = f(x_i) in [-1,1]
1255 // w_i *= exp(-y_i*f(x_i))
1256 assert( params.boost_type == GENTLE );
1258 for( int i = 0; i < n; i++ )
1259 weak_eval->data.db[i] *= -orig_response->data.i[i];
1261 cvExp( weak_eval, weak_eval );
1263 for( int i = 0; i < n; i++ )
1265 double w = weights->data.db[i] * weak_eval->data.db[i];
1266 weights->data.db[i] = w;
1272 // renormalize weights
1273 if( sumW > FLT_EPSILON )
1276 for( int i = 0; i < n; ++i )
1277 weights->data.db[i] *= sumW;
1281 bool CvCascadeBoost::isErrDesired()
1283 int sCount = data->sample_count,
1284 numPos = 0, numNeg = 0, numFalse = 0, numPosTrue = 0;
1285 float* eval = (float*) cvStackAlloc( sizeof(eval[0]) * sCount );
1287 for( int i = 0; i < sCount; i++ )
1288 if( ((CvCascadeBoostTrainData*)data)->featureEvaluator->getCls( i ) == 1.0F )
1289 eval[numPos++] = predict( i, true );
1290 icvSortFlt( eval, numPos, 0 );
1291 int thresholdIdx = (int)((1.0F - minHitRate) * numPos);
1292 threshold = eval[ thresholdIdx ];
1293 numPosTrue = numPos - thresholdIdx;
1294 for( int i = thresholdIdx - 1; i >= 0; i--)
1295 if ( abs( eval[i] - threshold) < FLT_EPSILON )
1297 float hitRate = ((float) numPosTrue) / ((float) numPos);
1299 for( int i = 0; i < sCount; i++ )
1301 if( ((CvCascadeBoostTrainData*)data)->featureEvaluator->getCls( i ) == 0.0F )
1308 float falseAlarm = ((float) numFalse) / ((float) numNeg);
1310 cout << "|"; cout.width(4); cout << right << weak->total;
1311 cout << "|"; cout.width(9); cout << right << hitRate;
1312 cout << "|"; cout.width(9); cout << right << falseAlarm;
1313 cout << "|" << endl;
1314 cout << "+----+---------+---------+" << endl;
1316 return falseAlarm <= maxFalseAlarm;
1319 void CvCascadeBoost::write( FileStorage &fs, const Mat& featureMap ) const
1322 CvCascadeBoostTree* weakTree;
1323 fs << CC_WEAK_COUNT << weak->total;
1324 fs << CC_STAGE_THRESHOLD << threshold;
1325 fs << CC_WEAK_CLASSIFIERS << "[";
1326 for( int wi = 0; wi < weak->total; wi++)
1328 /*sprintf( cmnt, "tree %i", wi );
1329 cvWriteComment( fs, cmnt, 0 );*/
1330 weakTree = *((CvCascadeBoostTree**) cvGetSeqElem( weak, wi ));
1331 weakTree->write( fs, featureMap );
1336 bool CvCascadeBoost::read( const FileNode &node,
1337 const CvFeatureEvaluator* _featureEvaluator,
1338 const CvCascadeBoostParams& _params )
1340 CvMemStorage* storage;
1342 data = new CvCascadeBoostTrainData( _featureEvaluator, _params );
1343 set_params( _params );
1345 node[CC_STAGE_THRESHOLD] >> threshold;
1346 FileNode rnode = node[CC_WEAK_CLASSIFIERS];
1348 storage = cvCreateMemStorage();
1349 weak = cvCreateSeq( 0, sizeof(CvSeq), sizeof(CvBoostTree*), storage );
1350 for( FileNodeIterator it = rnode.begin(); it != rnode.end(); it++ )
1352 CvCascadeBoostTree* tree = new CvCascadeBoostTree();
1353 tree->read( *it, this, data );
1354 cvSeqPush( weak, &tree );
1359 void CvCascadeBoost::markUsedFeaturesInMap( Mat& featureMap )
1361 for( int wi = 0; wi < weak->total; wi++ )
1363 CvCascadeBoostTree* weakTree = *((CvCascadeBoostTree**) cvGetSeqElem( weak, wi ));
1364 weakTree->markFeaturesInMap( featureMap );