2 #include "cascadeclassifier.h"
12 logRatio( double val )
14 const double eps = 1e-5;
16 val = max( val, eps );
17 val = min( val, 1. - eps );
18 return log( val/(1. - val) );
21 #define CV_CMP_FLT(i,j) (i < j)
22 static CV_IMPLEMENT_QSORT_EX( icvSortFlt, float, CV_CMP_FLT, const float* )
24 #define CV_CMP_NUM_IDX(i,j) (aux[i] < aux[j])
25 static CV_IMPLEMENT_QSORT_EX( icvSortIntAux, int, CV_CMP_NUM_IDX, const float* )
26 static CV_IMPLEMENT_QSORT_EX( icvSortUShAux, unsigned short, CV_CMP_NUM_IDX, const float* )
28 #define CV_THRESHOLD_EPS (0.00001F)
30 static const int MinBlockSize = 1 << 16;
31 static const int BlockSizeDelta = 1 << 10;
33 //----------------------------- CascadeBoostParams -------------------------------------------------
35 CvCascadeBoostParams::CvCascadeBoostParams() : minHitRate( 0.995F), maxFalseAlarm( 0.5F )
37 boost_type = CvBoost::GENTLE;
38 use_surrogates = use_1se_rule = truncate_pruned_tree = false;
41 CvCascadeBoostParams::CvCascadeBoostParams( int _boostType,
42 float _minHitRate, float _maxFalseAlarm,
43 double _weightTrimRate, int _maxDepth, int _maxWeakCount ) :
44 CvBoostParams( _boostType, _maxWeakCount, _weightTrimRate, _maxDepth, false, 0 )
46 boost_type = CvBoost::GENTLE;
47 minHitRate = _minHitRate;
48 maxFalseAlarm = _maxFalseAlarm;
49 use_surrogates = use_1se_rule = truncate_pruned_tree = false;
52 void CvCascadeBoostParams::write( FileStorage &fs ) const
54 String boostTypeStr = boost_type == CvBoost::DISCRETE ? CC_DISCRETE_BOOST :
55 boost_type == CvBoost::REAL ? CC_REAL_BOOST :
56 boost_type == CvBoost::LOGIT ? CC_LOGIT_BOOST :
57 boost_type == CvBoost::GENTLE ? CC_GENTLE_BOOST : String();
58 CV_Assert( !boostTypeStr.empty() );
59 fs << CC_BOOST_TYPE << boostTypeStr;
60 fs << CC_MINHITRATE << minHitRate;
61 fs << CC_MAXFALSEALARM << maxFalseAlarm;
62 fs << CC_TRIM_RATE << weight_trim_rate;
63 fs << CC_MAX_DEPTH << max_depth;
64 fs << CC_WEAK_COUNT << weak_count;
67 bool CvCascadeBoostParams::read( const FileNode &node )
70 FileNode rnode = node[CC_BOOST_TYPE];
71 rnode >> boostTypeStr;
72 boost_type = !boostTypeStr.compare( CC_DISCRETE_BOOST ) ? CvBoost::DISCRETE :
73 !boostTypeStr.compare( CC_REAL_BOOST ) ? CvBoost::REAL :
74 !boostTypeStr.compare( CC_LOGIT_BOOST ) ? CvBoost::LOGIT :
75 !boostTypeStr.compare( CC_GENTLE_BOOST ) ? CvBoost::GENTLE : -1;
77 CV_Error( CV_StsBadArg, "unsupported Boost type" );
78 node[CC_MINHITRATE] >> minHitRate;
79 node[CC_MAXFALSEALARM] >> maxFalseAlarm;
80 node[CC_TRIM_RATE] >> weight_trim_rate ;
81 node[CC_MAX_DEPTH] >> max_depth ;
82 node[CC_WEAK_COUNT] >> weak_count ;
83 if ( minHitRate <= 0 || minHitRate > 1 ||
84 maxFalseAlarm <= 0 || maxFalseAlarm > 1 ||
85 weight_trim_rate <= 0 || weight_trim_rate > 1 ||
86 max_depth <= 0 || weak_count <= 0 )
87 CV_Error( CV_StsBadArg, "bad parameters range");
91 void CvCascadeBoostParams::printDefaults() const
93 cout << "--boostParams--" << endl;
94 cout << " [-bt <{" << CC_DISCRETE_BOOST << ", "
95 << CC_REAL_BOOST << ", "
96 << CC_LOGIT_BOOST ", "
97 << CC_GENTLE_BOOST << "(default)}>]" << endl;
98 cout << " [-minHitRate <min_hit_rate> = " << minHitRate << ">]" << endl;
99 cout << " [-maxFalseAlarmRate <max_false_alarm_rate = " << maxFalseAlarm << ">]" << endl;
100 cout << " [-weightTrimRate <weight_trim_rate = " << weight_trim_rate << ">]" << endl;
101 cout << " [-maxDepth <max_depth_of_weak_tree = " << max_depth << ">]" << endl;
102 cout << " [-maxWeakCount <max_weak_tree_count = " << weak_count << ">]" << endl;
105 void CvCascadeBoostParams::printAttrs() const
107 String boostTypeStr = boost_type == CvBoost::DISCRETE ? CC_DISCRETE_BOOST :
108 boost_type == CvBoost::REAL ? CC_REAL_BOOST :
109 boost_type == CvBoost::LOGIT ? CC_LOGIT_BOOST :
110 boost_type == CvBoost::GENTLE ? CC_GENTLE_BOOST : String();
111 CV_Assert( !boostTypeStr.empty() );
112 cout << "boostType: " << boostTypeStr << endl;
113 cout << "minHitRate: " << minHitRate << endl;
114 cout << "maxFalseAlarmRate: " << maxFalseAlarm << endl;
115 cout << "weightTrimRate: " << weight_trim_rate << endl;
116 cout << "maxTreeDepth: " << max_depth << endl;
117 cout << "maxWeakCount: " << weak_count << endl;
120 bool CvCascadeBoostParams::scanAttr( const String prmName, const String val)
124 if( !prmName.compare( "-bt" ) )
126 boost_type = !val.compare( CC_DISCRETE_BOOST ) ? CvBoost::DISCRETE :
127 !val.compare( CC_REAL_BOOST ) ? CvBoost::REAL :
128 !val.compare( CC_LOGIT_BOOST ) ? CvBoost::LOGIT :
129 !val.compare( CC_GENTLE_BOOST ) ? CvBoost::GENTLE : -1;
130 if (boost_type == -1)
133 else if( !prmName.compare( "-minHitRate" ) )
135 minHitRate = (float) atof( val.c_str() );
137 else if( !prmName.compare( "-maxFalseAlarmRate" ) )
139 weight_trim_rate = (float) atof( val.c_str() );
141 else if( !prmName.compare( "-weightTrimRate" ) )
143 weight_trim_rate = (float) atof( val.c_str() );
145 else if( !prmName.compare( "-maxDepth" ) )
147 max_depth = atoi( val.c_str() );
149 else if( !prmName.compare( "-maxWeakCount" ) )
151 weak_count = atoi( val.c_str() );
159 //---------------------------- CascadeBoostTrainData -----------------------------
161 CvCascadeBoostTrainData::CvCascadeBoostTrainData( const CvFeatureEvaluator* _featureEvaluator,
162 const CvDTreeParams& _params )
164 is_classifier = true;
165 var_all = var_count = (int)_featureEvaluator->getNumFeatures();
167 featureEvaluator = _featureEvaluator;
169 set_params( _params );
170 max_c_count = MAX( 2, featureEvaluator->getMaxCatCount() );
171 var_type = cvCreateMat( 1, var_count + 2, CV_32SC1 );
172 if ( featureEvaluator->getMaxCatCount() > 0 )
175 cat_var_count = var_count;
177 for( int vi = 0; vi < var_count; vi++ )
179 var_type->data.i[vi] = vi;
185 ord_var_count = var_count;
186 for( int vi = 1; vi <= var_count; vi++ )
188 var_type->data.i[vi-1] = -vi;
191 var_type->data.i[var_count] = cat_var_count;
192 var_type->data.i[var_count+1] = cat_var_count+1;
194 int maxSplitSize = cvAlign(sizeof(CvDTreeSplit) + (MAX(0,max_c_count - 33)/32)*sizeof(int),sizeof(void*));
195 int treeBlockSize = MAX((int)sizeof(CvDTreeNode)*8, maxSplitSize);
196 treeBlockSize = MAX(treeBlockSize + BlockSizeDelta, MinBlockSize);
197 tree_storage = cvCreateMemStorage( treeBlockSize );
198 node_heap = cvCreateSet( 0, sizeof(node_heap[0]), sizeof(CvDTreeNode), tree_storage );
199 split_heap = cvCreateSet( 0, sizeof(split_heap[0]), maxSplitSize, tree_storage );
202 CvCascadeBoostTrainData::CvCascadeBoostTrainData( const CvFeatureEvaluator* _featureEvaluator,
204 int _precalcValBufSize, int _precalcIdxBufSize,
205 const CvDTreeParams& _params )
207 setData( _featureEvaluator, _numSamples, _precalcValBufSize, _precalcIdxBufSize, _params );
210 void CvCascadeBoostTrainData::setData( const CvFeatureEvaluator* _featureEvaluator,
212 int _precalcValBufSize, int _precalcIdxBufSize,
213 const CvDTreeParams& _params )
216 unsigned short* udst = 0;
222 is_classifier = true;
226 set_params( _params );
228 CV_Assert( _featureEvaluator );
229 featureEvaluator = _featureEvaluator;
231 max_c_count = MAX( 2, featureEvaluator->getMaxCatCount() );
232 _resp = featureEvaluator->getCls();
234 // TODO: check responses: elements must be 0 or 1
236 if( _precalcValBufSize < 0 || _precalcIdxBufSize < 0)
237 CV_Error( CV_StsOutOfRange, "_numPrecalcVal and _numPrecalcIdx must be positive or 0" );
239 var_count = var_all = featureEvaluator->getNumFeatures();
240 sample_count = _numSamples;
243 if (sample_count < 65536)
246 numPrecalcVal = min( (_precalcValBufSize*1048576) / int(sizeof(float)*sample_count), var_count );
247 numPrecalcIdx = min( (_precalcIdxBufSize*1048576) /
248 int((is_buf_16u ? sizeof(unsigned short) : sizeof (int))*sample_count), var_count );
250 int maxNumThreads = 1;
252 maxNumThreads = omp_get_num_procs();
254 valCache.create( max(numPrecalcVal, maxNumThreads), sample_count, CV_32FC1 );
255 var_type = cvCreateMat( 1, var_count + 2, CV_32SC1 );
257 if ( featureEvaluator->getMaxCatCount() > 0 )
260 cat_var_count = var_count;
262 for( int vi = 0; vi < var_count; vi++ )
264 var_type->data.i[vi] = vi;
270 ord_var_count = var_count;
271 for( int vi = 1; vi <= var_count; vi++ )
273 var_type->data.i[vi-1] = -vi;
276 var_type->data.i[var_count] = cat_var_count;
277 var_type->data.i[var_count+1] = cat_var_count+1;
278 work_var_count = ( cat_var_count ? 0 : numPrecalcIdx ) + 1;
279 buf_size = (work_var_count + 1) * sample_count;
283 buf = cvCreateMat( buf_count, buf_size, CV_16UC1 );
285 buf = cvCreateMat( buf_count, buf_size, CV_32SC1 );
287 cat_count = cvCreateMat( 1, cat_var_count + 1, CV_32SC1 );
288 pred_float_buf.resize(maxNumThreads);
289 pred_int_buf.resize(maxNumThreads);
290 resp_float_buf.resize(maxNumThreads);
291 resp_int_buf.resize(maxNumThreads);
292 cv_lables_buf.resize(maxNumThreads);
293 sample_idx_buf.resize(maxNumThreads);
294 for( int ti = 0; ti < maxNumThreads; ti++ )
296 pred_float_buf[ti].resize(sample_count);
297 pred_int_buf[ti].resize(sample_count);
298 resp_float_buf[ti].resize(sample_count);
299 resp_int_buf[ti].resize(sample_count);
300 cv_lables_buf[ti].resize(sample_count);
301 sample_idx_buf[ti].resize(sample_count);
304 // precalculate valCache and set indices in buf
307 // now calculate the maximum size of split,
308 // create memory storage that will keep nodes and splits of the decision tree
309 // allocate root node and the buffer for the whole training data
310 int maxSplitSize = cvAlign(sizeof(CvDTreeSplit) +
311 (MAX(0,sample_count - 33)/32)*sizeof(int),sizeof(void*));
312 int treeBlockSize = MAX((int)sizeof(CvDTreeNode)*8, maxSplitSize);
313 treeBlockSize = MAX(treeBlockSize + BlockSizeDelta, MinBlockSize);
314 tree_storage = cvCreateMemStorage( treeBlockSize );
315 node_heap = cvCreateSet( 0, sizeof(*node_heap), sizeof(CvDTreeNode), tree_storage );
317 int nvSize = var_count*sizeof(int);
318 nvSize = cvAlign(MAX( nvSize, (int)sizeof(CvSetElem) ), sizeof(void*));
319 int tempBlockSize = nvSize;
320 tempBlockSize = MAX( tempBlockSize + BlockSizeDelta, MinBlockSize );
321 temp_storage = cvCreateMemStorage( tempBlockSize );
322 nv_heap = cvCreateSet( 0, sizeof(*nv_heap), nvSize, temp_storage );
324 data_root = new_node( 0, sample_count, 0, 0 );
328 udst = (unsigned short*)(buf->data.s + work_var_count*sample_count);
330 idst = buf->data.i + work_var_count*sample_count;
332 for (int si = 0; si < sample_count; si++)
335 udst[si] = (unsigned short)si;
339 for( int vi = 0; vi < var_count; vi++ )
340 data_root->set_num_valid(vi, sample_count);
341 for( int vi = 0; vi < cat_var_count; vi++ )
342 cat_count->data.i[vi] = max_c_count;
344 cat_count->data.i[cat_var_count] = 2;
346 maxSplitSize = cvAlign(sizeof(CvDTreeSplit) +
347 (MAX(0,max_c_count - 33)/32)*sizeof(int),sizeof(void*));
348 split_heap = cvCreateSet( 0, sizeof(*split_heap), maxSplitSize, tree_storage );
350 priors = cvCreateMat( 1, get_num_classes(), CV_64F );
351 cvSet(priors, cvScalar(1));
352 priors_mult = cvCloneMat( priors );
353 counts = cvCreateMat( 1, get_num_classes(), CV_32SC1 );
354 direction = cvCreateMat( 1, sample_count, CV_8UC1 );
355 split_buf = cvCreateMat( 1, sample_count, CV_32SC1 );
358 void CvCascadeBoostTrainData::free_train_data()
360 CvDTreeTrainData::free_train_data();
364 void CvCascadeBoostTrainData::get_class_labels( CvDTreeNode* n, int* labelsBuf, const int** labels )
366 int nodeSampleCount = n->sample_count;
367 int* sampleIndicesBuf = get_sample_idx_buf();
368 const int* sampleIndices = 0;
369 int rStep = CV_IS_MAT_CONT( responses->type ) ? 1 : responses->step / CV_ELEM_SIZE( responses->type );
371 get_sample_indices(n, sampleIndicesBuf, &sampleIndices);
373 for( int si = 0; si < nodeSampleCount; si++ )
375 int sidx = sampleIndices[si];
376 labelsBuf[si] = (int)responses->data.fl[sidx*rStep];
381 void CvCascadeBoostTrainData::get_sample_indices( CvDTreeNode* n, int* indicesBuf, const int** indices )
383 CvDTreeTrainData::get_cat_var_data( n, get_work_var_count(), indicesBuf, indices );
386 void CvCascadeBoostTrainData::get_cv_labels( CvDTreeNode* n, int* labels_buf, const int** labels )
388 CvDTreeTrainData::get_cat_var_data( n, get_work_var_count()- 1, labels_buf, labels );
391 int CvCascadeBoostTrainData::get_ord_var_data( CvDTreeNode* n, int vi, float* ordValuesBuf, int* indicesBuf,
392 const float** ordValues, const int** indices )
394 int nodeSampleCount = n->sample_count;
395 int* sampleIndicesBuf = get_sample_idx_buf();
396 const int* sampleIndices = 0;
397 get_sample_indices(n, sampleIndicesBuf, &sampleIndices);
399 if ( vi < numPrecalcIdx )
402 *indices = buf->data.i + n->buf_idx*buf->cols + vi*sample_count + n->offset;
405 const unsigned short* shortIndices = (const unsigned short*)(buf->data.s + n->buf_idx*buf->cols +
406 vi*sample_count + n->offset );
407 for( int i = 0; i < nodeSampleCount; i++ )
408 indicesBuf[i] = shortIndices[i];
409 *indices = indicesBuf;
412 if ( vi < numPrecalcVal )
414 for( int i = 0; i < nodeSampleCount; i++ )
416 int idx = (*indices)[i];
417 idx = sampleIndices[idx];
418 ordValuesBuf[i] = valCache.at<float>( vi, idx);
423 for( int i = 0; i < nodeSampleCount; i++ )
425 int idx = (*indices)[i];
426 idx = sampleIndices[idx];
427 ordValuesBuf[i] = (*featureEvaluator)( vi, idx);
431 else // vi >= numPrecalcIdx
433 // use sample_indices as temporary buffer for values
434 if ( vi < numPrecalcVal )
436 for( int i = 0; i < nodeSampleCount; i++ )
439 ((float*)sampleIndices)[i] = valCache.at<float>( vi, sampleIndices[i] );
444 for( int i = 0; i < nodeSampleCount; i++ )
447 ((float*)sampleIndices)[i] = (*featureEvaluator)( vi, sampleIndices[i]);
450 icvSortIntAux( indicesBuf, sample_count, (float *)sampleIndices );
451 for( int i = 0; i < nodeSampleCount; i++ )
452 ordValuesBuf[i] = ((float*)sampleIndices)[indicesBuf[i]];
453 *indices = indicesBuf;
456 *ordValues = ordValuesBuf;
460 int CvCascadeBoostTrainData::get_cat_var_data( CvDTreeNode* n, int vi, int* catValuesBuf, const int** catValues )
462 int nodeSampleCount = n->sample_count;
463 int* sampleIndicesBuf = get_sample_idx_buf();
464 const int* sampleIndices = 0;
465 get_sample_indices(n, sampleIndicesBuf, &sampleIndices);
467 if ( vi < numPrecalcVal )
469 for( int i = 0; i < nodeSampleCount; i++ )
470 catValuesBuf[i] = (int) valCache.at<float>( vi, sampleIndices[i]);
474 for( int i = 0; i < nodeSampleCount; i++ )
475 catValuesBuf[i] = (int)(*featureEvaluator)( vi, sampleIndices[i] );
477 *catValues = catValuesBuf;
482 float CvCascadeBoostTrainData::getVarValue( int vi, int si )
484 if ( vi < numPrecalcVal && !valCache.empty() )
485 return valCache.at<float>( vi, si );
486 return (*featureEvaluator)( vi, si );
489 void CvCascadeBoostTrainData::precalculate()
491 int minNum = MIN( numPrecalcVal, numPrecalcIdx);
492 unsigned short* udst = (unsigned short*)buf->data.s;
493 int* idst = buf->data.i;
495 CV_DbgAssert( !valCache.empty() );
496 double proctime = -TIME( 0 );
499 int maxNumThreads = omp_get_num_procs();
500 #pragma omp parallel for num_threads(maxNumThreads) schedule(dynamic)
502 for ( int fi = numPrecalcVal; fi < numPrecalcIdx; fi++)
506 threadIdx = omp_get_thread_num();
508 for( int si = 0; si < sample_count; si++ )
510 valCache.ptr<float>(threadIdx)[si] = (*featureEvaluator)( fi, si );
512 *(udst + fi*sample_count + si) = (unsigned short)si;
514 *(idst + fi*sample_count + si) = si;
517 icvSortUShAux( udst + fi*sample_count, sample_count, valCache.ptr<float>(threadIdx) );
519 icvSortIntAux( idst + fi*sample_count, sample_count, valCache.ptr<float>(threadIdx) );
522 #pragma omp parallel for num_threads(maxNumThreads) schedule(dynamic)
524 for ( int fi = 0; fi < minNum; fi++)
526 for( int si = 0; si < sample_count; si++ )
528 valCache.ptr<float>(fi)[si] = (*featureEvaluator)( fi, si );
530 *(udst + fi*sample_count + si) = (unsigned short)si;
532 *(idst + fi*sample_count + si) = si;
535 icvSortUShAux( udst + fi*sample_count, sample_count, valCache.ptr<float>(fi) );
537 icvSortIntAux( idst + fi*sample_count, sample_count, valCache.ptr<float>(fi) );
540 #pragma omp parallel for num_threads(maxNumThreads) schedule(dynamic)
542 for ( int fi = minNum; fi < numPrecalcVal; fi++)
543 for( int si = 0; si < sample_count; si++ )
544 valCache.ptr<float>(fi)[si] = (*featureEvaluator)( fi, si );
546 cout << "Precalculation time: " << (proctime + TIME( 0 )) << endl;
549 //-------------------------------- CascadeBoostTree ----------------------------------------
551 CvDTreeNode* CvCascadeBoostTree::predict( int sampleIdx ) const
553 CvDTreeNode* node = root;
555 CV_Error( CV_StsError, "The tree has not been trained yet" );
557 if ( ((CvCascadeBoostTrainData*)data)->featureEvaluator->getMaxCatCount() == 0 ) // ordered
561 CvDTreeSplit* split = node->split;
562 float val = ((CvCascadeBoostTrainData*)data)->getVarValue( split->var_idx, sampleIdx );
563 node = val <= split->ord.c ? node->left : node->right;
570 CvDTreeSplit* split = node->split;
571 int c = (int)((CvCascadeBoostTrainData*)data)->getVarValue( split->var_idx, sampleIdx );
572 node = CV_DTREE_CAT_DIR(c, split->subset) < 0 ? node->left : node->right;
578 void CvCascadeBoostTree::write( FileStorage &fs, const Mat& featureMap )
580 int maxCatCount = ((CvCascadeBoostTrainData*)data)->featureEvaluator->getMaxCatCount();
581 int subsetN = (maxCatCount + 31)/32;
582 queue<CvDTreeNode*> internalNodesQueue;
583 int size = (int)pow( 2.f, (float)ensemble->get_params().max_depth);
584 Ptr<float> leafVals = new float[size];
586 int internalNodeIdx = 1;
587 CvDTreeNode* tempNode;
589 CV_DbgAssert( root );
590 internalNodesQueue.push( root );
593 fs << CC_INTERNAL_NODES << "[:";
594 while (!internalNodesQueue.empty())
596 tempNode = internalNodesQueue.front();
597 CV_Assert( tempNode->left );
598 if ( !tempNode->left->left && !tempNode->left->right) // left node is leaf
600 leafVals[-leafValIdx] = (float)tempNode->left->value;
605 internalNodesQueue.push( tempNode->left );
606 fs << internalNodeIdx++;
608 CV_Assert( tempNode->right );
609 if ( !tempNode->right->left && !tempNode->right->right) // right node is leaf
611 leafVals[-leafValIdx] = (float)tempNode->right->value;
616 internalNodesQueue.push( tempNode->right );
617 fs << internalNodeIdx++;
619 int fidx = tempNode->split->var_idx;
620 fidx = featureMap.empty() ? fidx : featureMap.at<int>(0, fidx);
623 fs << tempNode->split->ord.c;
625 for( int i = 0; i < subsetN; i++ )
626 fs << tempNode->split->subset[i];
627 internalNodesQueue.pop();
629 fs << "]"; // CC_INTERNAL_NODES
631 fs << CC_LEAF_VALUES << "[:";
632 for (int ni = 0; ni < -leafValIdx; ni++)
634 fs << "]"; // CC_LEAF_VALUES
638 void CvCascadeBoostTree::read( const FileNode &node, CvBoost* _ensemble,
639 CvDTreeTrainData* _data )
641 int maxCatCount = ((CvCascadeBoostTrainData*)_data)->featureEvaluator->getMaxCatCount();
642 int subsetN = (maxCatCount + 31)/32;
643 int step = 3 + ( maxCatCount>0 ? subsetN : 1 );
645 queue<CvDTreeNode*> internalNodesQueue;
646 FileNodeIterator internalNodesIt, leafValsuesIt;
647 CvDTreeNode* prntNode, *cldNode;
651 ensemble = _ensemble;
655 FileNode rnode = node[CC_INTERNAL_NODES];
656 internalNodesIt = rnode.end();
657 leafValsuesIt = node[CC_LEAF_VALUES].end();
658 internalNodesIt--; leafValsuesIt--;
659 for( size_t i = 0; i < rnode.size()/step; i++ )
661 prntNode = data->new_node( 0, 0, 0, 0 );
662 if ( maxCatCount > 0 )
664 prntNode->split = data->new_split_cat( 0, 0 );
665 for( int j = subsetN-1; j>=0; j--)
667 *internalNodesIt >> prntNode->split->subset[j]; internalNodesIt--;
673 *internalNodesIt >> split_value; internalNodesIt--;
674 prntNode->split = data->new_split_ord( 0, split_value, 0, 0, 0);
676 *internalNodesIt >> prntNode->split->var_idx; internalNodesIt--;
678 *internalNodesIt >> ridx; internalNodesIt--;
679 *internalNodesIt >> lidx;internalNodesIt--;
682 prntNode->right = cldNode = data->new_node( 0, 0, 0, 0 );
683 *leafValsuesIt >> cldNode->value; leafValsuesIt--;
684 cldNode->parent = prntNode;
688 prntNode->right = internalNodesQueue.front();
689 prntNode->right->parent = prntNode;
690 internalNodesQueue.pop();
695 prntNode->left = cldNode = data->new_node( 0, 0, 0, 0 );
696 *leafValsuesIt >> cldNode->value; leafValsuesIt--;
697 cldNode->parent = prntNode;
701 prntNode->left = internalNodesQueue.front();
702 prntNode->left->parent = prntNode;
703 internalNodesQueue.pop();
706 internalNodesQueue.push( prntNode );
709 root = internalNodesQueue.front();
710 internalNodesQueue.pop();
713 void CvCascadeBoostTree::split_node_data( CvDTreeNode* node )
715 int n = node->sample_count, nl, nr, scount = data->sample_count;
716 char* dir = (char*)data->direction->data.ptr;
717 CvDTreeNode *left = 0, *right = 0;
718 int* newIdx = data->split_buf->data.i;
719 int newBufIdx = data->get_child_buf_idx( node );
720 int workVarCount = data->get_work_var_count();
721 CvMat* buf = data->buf;
722 int* tempBuf = (int*)cvStackAlloc(n*sizeof(tempBuf[0]));
725 complete_node_dir(node);
727 for( int i = nl = nr = 0; i < n; i++ )
730 // initialize new indices for splitting ordered variables
731 newIdx[i] = (nl & (d-1)) | (nr & -d); // d ? ri : li
736 node->left = left = data->new_node( node, nl, newBufIdx, node->offset );
737 node->right = right = data->new_node( node, nr, newBufIdx, node->offset + nl );
739 splitInputData = node->depth + 1 < data->params.max_depth &&
740 (node->left->sample_count > data->params.min_sample_count ||
741 node->right->sample_count > data->params.min_sample_count);
743 // split ordered variables, keep both halves sorted.
744 for( int vi = 0; vi < ((CvCascadeBoostTrainData*)data)->numPrecalcIdx; vi++ )
746 int ci = data->get_var_type(vi);
747 int n1 = node->get_num_valid(vi);
748 int *src_idx_buf = data->get_pred_int_buf();
749 const int* src_idx = 0;
750 float *src_val_buf = data->get_pred_float_buf();
751 const float* src_val = 0;
753 if( ci >= 0 || !splitInputData )
756 data->get_ord_var_data(node, vi, src_val_buf, src_idx_buf, &src_val, &src_idx);
758 for(int i = 0; i < n; i++)
759 tempBuf[i] = src_idx[i];
761 if (data->is_buf_16u)
763 unsigned short *ldst, *rdst, *ldst0, *rdst0;
764 ldst0 = ldst = (unsigned short*)(buf->data.s + left->buf_idx*buf->cols +
765 vi*scount + left->offset);
766 rdst0 = rdst = (unsigned short*)(ldst + nl);
769 for( int i = 0; i < n1; i++ )
771 int idx = tempBuf[i];
776 *rdst = (unsigned short)idx;
781 *ldst = (unsigned short)idx;
787 left->set_num_valid(vi, (int)(ldst - ldst0));
788 right->set_num_valid(vi, (int)(rdst - rdst0));
792 int *ldst0, *ldst, *rdst0, *rdst;
793 ldst0 = ldst = buf->data.i + left->buf_idx*buf->cols +
794 vi*scount + left->offset;
795 rdst0 = rdst = buf->data.i + right->buf_idx*buf->cols +
796 vi*scount + right->offset;
799 for( int i = 0; i < n1; i++ )
801 int idx = tempBuf[i];
816 left->set_num_valid(vi, (int)(ldst - ldst0));
817 right->set_num_valid(vi, (int)(rdst - rdst0));
822 // split cv_labels using newIdx relocation table
823 int *src_lbls_buf = data->get_pred_int_buf();
824 const int* src_lbls = 0;
825 data->get_cv_labels(node, src_lbls_buf, &src_lbls);
827 for(int i = 0; i < n; i++)
828 tempBuf[i] = src_lbls[i];
830 if (data->is_buf_16u)
832 unsigned short *ldst = (unsigned short *)(buf->data.s + left->buf_idx*buf->cols +
833 (workVarCount-1)*scount + left->offset);
834 unsigned short *rdst = (unsigned short *)(buf->data.s + right->buf_idx*buf->cols +
835 (workVarCount-1)*scount + right->offset);
837 for( int i = 0; i < n; i++ )
839 int idx = tempBuf[i];
842 *rdst = (unsigned short)idx;
847 *ldst = (unsigned short)idx;
855 int *ldst = buf->data.i + left->buf_idx*buf->cols +
856 (workVarCount-1)*scount + left->offset;
857 int *rdst = buf->data.i + right->buf_idx*buf->cols +
858 (workVarCount-1)*scount + right->offset;
860 for( int i = 0; i < n; i++ )
862 int idx = tempBuf[i];
875 for( int vi = 0; vi < data->var_count; vi++ )
877 left->set_num_valid(vi, (int)(nl));
878 right->set_num_valid(vi, (int)(nr));
881 // split sample indices
882 int *sampleIdx_src_buf = data->get_sample_idx_buf();
883 const int* sampleIdx_src = 0;
884 data->get_sample_indices(node, sampleIdx_src_buf, &sampleIdx_src);
886 for(int i = 0; i < n; i++)
887 tempBuf[i] = sampleIdx_src[i];
889 if (data->is_buf_16u)
891 unsigned short* ldst = (unsigned short*)(buf->data.s + left->buf_idx*buf->cols +
892 workVarCount*scount + left->offset);
893 unsigned short* rdst = (unsigned short*)(buf->data.s + right->buf_idx*buf->cols +
894 workVarCount*scount + right->offset);
895 for (int i = 0; i < n; i++)
897 unsigned short idx = (unsigned short)tempBuf[i];
912 int* ldst = buf->data.i + left->buf_idx*buf->cols +
913 workVarCount*scount + left->offset;
914 int* rdst = buf->data.i + right->buf_idx*buf->cols +
915 workVarCount*scount + right->offset;
916 for (int i = 0; i < n; i++)
918 int idx = tempBuf[i];
932 // deallocate the parent node data that is not needed anymore
933 data->free_node_data(node);
936 void auxMarkFeaturesInMap( const CvDTreeNode* node, Mat& featureMap)
938 if ( node && node->split )
940 featureMap.ptr<int>(0)[node->split->var_idx] = 1;
941 auxMarkFeaturesInMap( node->left, featureMap );
942 auxMarkFeaturesInMap( node->right, featureMap );
946 void CvCascadeBoostTree::markFeaturesInMap( Mat& featureMap )
948 auxMarkFeaturesInMap( root, featureMap );
951 //----------------------------------- CascadeBoost --------------------------------------
953 bool CvCascadeBoost::train( const CvFeatureEvaluator* _featureEvaluator,
955 int _precalcValBufSize, int _precalcIdxBufSize,
956 const CvCascadeBoostParams& _params )
960 data = new CvCascadeBoostTrainData( _featureEvaluator, _numSamples,
961 _precalcValBufSize, _precalcIdxBufSize, _params );
962 CvMemStorage *storage = cvCreateMemStorage();
963 weak = cvCreateSeq( 0, sizeof(CvSeq), sizeof(CvBoostTree*), storage );
966 set_params( _params );
967 if ( (_params.boost_type == LOGIT) || (_params.boost_type == GENTLE) )
968 data->do_responses_copy();
972 cout << "+----+---------+---------+" << endl;
973 cout << "| N | HR | FA |" << endl;
974 cout << "+----+---------+---------+" << endl;
978 CvCascadeBoostTree* tree = new CvCascadeBoostTree;
979 if( !tree->train( data, subsample_mask, this ) )
981 // TODO: may be should finish the loop (!!!)
986 cvSeqPush( weak, &tree );
987 update_weights( tree );
990 while( !isErrDesired() && (weak->total < params.weak_count) );
992 data->is_classifier = true;
993 data->free_train_data();
997 float CvCascadeBoost::predict( int sampleIdx, bool returnSum ) const
1002 cvStartReadSeq( weak, &reader );
1003 cvSetSeqReaderPos( &reader, 0 );
1004 for( int i = 0; i < weak->total; i++ )
1007 CV_READ_SEQ_ELEM( wtree, reader );
1008 sum += ((CvCascadeBoostTree*)wtree)->predict(sampleIdx)->value;
1011 sum = sum < threshold - CV_THRESHOLD_EPS ? 0.0 : 1.0;
1015 bool CvCascadeBoost::set_params( const CvBoostParams& _params )
1017 minHitRate = ((CvCascadeBoostParams&)_params).minHitRate;
1018 maxFalseAlarm = ((CvCascadeBoostParams&)_params).maxFalseAlarm;
1019 return ( ( minHitRate > 0 ) && ( minHitRate < 1) &&
1020 ( maxFalseAlarm > 0 ) && ( maxFalseAlarm < 1) &&
1021 CvBoost::set_params( _params ));
1024 void CvCascadeBoost::update_weights( CvBoostTree* tree )
1026 int n = data->sample_count;
1031 const int* sampleIdx = 0;
1032 if ( (params.boost_type == LOGIT) || (params.boost_type == GENTLE) )
1034 step = CV_IS_MAT_CONT(data->responses_copy->type) ?
1035 1 : data->responses_copy->step / CV_ELEM_SIZE(data->responses_copy->type);
1036 fdata = data->responses_copy->data.fl;
1037 sampleIdxBuf = (int*)cvStackAlloc(data->sample_count*sizeof(sampleIdxBuf[0]));
1038 data->get_sample_indices( data->data_root, sampleIdxBuf, &sampleIdx );
1040 CvMat* buf = data->buf;
1041 if( !tree ) // before training the first tree, initialize weights and other parameters
1043 int* classLabelsBuf = data->get_resp_int_buf();
1044 const int* classLabels = 0;
1045 data->get_class_labels(data->data_root, classLabelsBuf, &classLabels);
1046 // in case of logitboost and gentle adaboost each weak tree is a regression tree,
1047 // so we need to convert class labels to floating-point values
1048 float* responses_buf = data->get_resp_float_buf();
1049 const float* responses = 0;
1050 data->get_ord_responses(data->data_root, responses_buf, &responses);
1053 double p[2] = { 1, 1 };
1055 cvReleaseMat( &orig_response );
1056 cvReleaseMat( &sum_response );
1057 cvReleaseMat( &weak_eval );
1058 cvReleaseMat( &subsample_mask );
1059 cvReleaseMat( &weights );
1061 orig_response = cvCreateMat( 1, n, CV_32S );
1062 weak_eval = cvCreateMat( 1, n, CV_64F );
1063 subsample_mask = cvCreateMat( 1, n, CV_8U );
1064 weights = cvCreateMat( 1, n, CV_64F );
1065 subtree_weights = cvCreateMat( 1, n + 2, CV_64F );
1067 if (data->is_buf_16u)
1069 unsigned short* labels = (unsigned short*)(buf->data.s + data->data_root->buf_idx*buf->cols +
1070 data->data_root->offset + (data->work_var_count-1)*data->sample_count);
1071 for( int i = 0; i < n; i++ )
1073 // save original categorical responses {0,1}, convert them to {-1,1}
1074 orig_response->data.i[i] = classLabels[i]*2 - 1;
1075 // make all the samples active at start.
1076 // later, in trim_weights() deactivate/reactive again some, if need
1077 subsample_mask->data.ptr[i] = (uchar)1;
1078 // make all the initial weights the same.
1079 weights->data.db[i] = w0*p[classLabels[i]];
1080 // set the labels to find (from within weak tree learning proc)
1081 // the particular sample weight, and where to store the response.
1082 labels[i] = (unsigned short)i;
1087 int* labels = buf->data.i + data->data_root->buf_idx*buf->cols +
1088 data->data_root->offset + (data->work_var_count-1)*data->sample_count;
1090 for( int i = 0; i < n; i++ )
1092 // save original categorical responses {0,1}, convert them to {-1,1}
1093 orig_response->data.i[i] = classLabels[i]*2 - 1;
1094 subsample_mask->data.ptr[i] = (uchar)1;
1095 weights->data.db[i] = w0*p[classLabels[i]];
1100 if( params.boost_type == LOGIT )
1102 sum_response = cvCreateMat( 1, n, CV_64F );
1104 for( int i = 0; i < n; i++ )
1106 sum_response->data.db[i] = 0;
1107 fdata[sampleIdx[i]*step] = orig_response->data.i[i] > 0 ? 2.f : -2.f;
1110 // in case of logitboost each weak tree is a regression tree.
1111 // the target function values are recalculated for each of the trees
1112 data->is_classifier = false;
1114 else if( params.boost_type == GENTLE )
1116 for( int i = 0; i < n; i++ )
1117 fdata[sampleIdx[i]*step] = (float)orig_response->data.i[i];
1119 data->is_classifier = false;
1124 // at this moment, for all the samples that participated in the training of the most
1125 // recent weak classifier we know the responses. For other samples we need to compute them
1126 if( have_subsample )
1128 // invert the subsample mask
1129 cvXorS( subsample_mask, cvScalar(1.), subsample_mask );
1131 // run tree through all the non-processed samples
1132 for( int i = 0; i < n; i++ )
1133 if( subsample_mask->data.ptr[i] )
1135 weak_eval->data.db[i] = ((CvCascadeBoostTree*)tree)->predict( i )->value;
1139 // now update weights and other parameters for each type of boosting
1140 if( params.boost_type == DISCRETE )
1142 // Discrete AdaBoost:
1143 // weak_eval[i] (=f(x_i)) is in {-1,1}
1144 // err = sum(w_i*(f(x_i) != y_i))/sum(w_i)
1145 // C = log((1-err)/err)
1146 // w_i *= exp(C*(f(x_i) != y_i))
1149 double scale[] = { 1., 0. };
1151 for( int i = 0; i < n; i++ )
1153 double w = weights->data.db[i];
1155 err += w*(weak_eval->data.db[i] != orig_response->data.i[i]);
1160 C = err = -logRatio( err );
1161 scale[1] = exp(err);
1164 for( int i = 0; i < n; i++ )
1166 double w = weights->data.db[i]*
1167 scale[weak_eval->data.db[i] != orig_response->data.i[i]];
1169 weights->data.db[i] = w;
1174 else if( params.boost_type == REAL )
1177 // weak_eval[i] = f(x_i) = 0.5*log(p(x_i)/(1-p(x_i))), p(x_i)=P(y=1|x_i)
1178 // w_i *= exp(-y_i*f(x_i))
1180 for( int i = 0; i < n; i++ )
1181 weak_eval->data.db[i] *= -orig_response->data.i[i];
1183 cvExp( weak_eval, weak_eval );
1185 for( int i = 0; i < n; i++ )
1187 double w = weights->data.db[i]*weak_eval->data.db[i];
1189 weights->data.db[i] = w;
1192 else if( params.boost_type == LOGIT )
1195 // weak_eval[i] = f(x_i) in [-z_max,z_max]
1196 // sum_response = F(x_i).
1197 // F(x_i) += 0.5*f(x_i)
1198 // p(x_i) = exp(F(x_i))/(exp(F(x_i)) + exp(-F(x_i))=1/(1+exp(-2*F(x_i)))
1199 // reuse weak_eval: weak_eval[i] <- p(x_i)
1200 // w_i = p(x_i)*1(1 - p(x_i))
1201 // z_i = ((y_i+1)/2 - p(x_i))/(p(x_i)*(1 - p(x_i)))
1202 // store z_i to the data->data_root as the new target responses
1204 const double lbWeightThresh = FLT_EPSILON;
1205 const double lbZMax = 10.;
1206 float* responsesBuf = data->get_resp_float_buf();
1207 const float* responses = 0;
1208 data->get_ord_responses(data->data_root, responsesBuf, &responses);
1210 for( int i = 0; i < n; i++ )
1212 double s = sum_response->data.db[i] + 0.5*weak_eval->data.db[i];
1213 sum_response->data.db[i] = s;
1214 weak_eval->data.db[i] = -2*s;
1217 cvExp( weak_eval, weak_eval );
1219 for( int i = 0; i < n; i++ )
1221 double p = 1./(1. + weak_eval->data.db[i]);
1222 double w = p*(1 - p), z;
1223 w = MAX( w, lbWeightThresh );
1224 weights->data.db[i] = w;
1226 if( orig_response->data.i[i] > 0 )
1229 fdata[sampleIdx[i]*step] = (float)min(z, lbZMax);
1234 fdata[sampleIdx[i]*step] = (float)-min(z, lbZMax);
1241 // weak_eval[i] = f(x_i) in [-1,1]
1242 // w_i *= exp(-y_i*f(x_i))
1243 assert( params.boost_type == GENTLE );
1245 for( int i = 0; i < n; i++ )
1246 weak_eval->data.db[i] *= -orig_response->data.i[i];
1248 cvExp( weak_eval, weak_eval );
1250 for( int i = 0; i < n; i++ )
1252 double w = weights->data.db[i] * weak_eval->data.db[i];
1253 weights->data.db[i] = w;
1259 // renormalize weights
1260 if( sumW > FLT_EPSILON )
1263 for( int i = 0; i < n; ++i )
1264 weights->data.db[i] *= sumW;
1268 bool CvCascadeBoost::isErrDesired()
1270 int sCount = data->sample_count,
1271 numPos = 0, numNeg = 0, numFalse = 0, numPosTrue = 0;
1272 float* eval = (float*) cvStackAlloc( sizeof(eval[0]) * sCount );
1274 for( int i = 0; i < sCount; i++ )
1275 if( ((CvCascadeBoostTrainData*)data)->featureEvaluator->getCls( i ) == 1.0F )
1276 eval[numPos++] = predict( i, true );
1277 icvSortFlt( eval, numPos, 0 );
1278 int thresholdIdx = (int)((1.0F - minHitRate) * numPos);
1279 threshold = eval[ thresholdIdx ];
1280 numPosTrue = numPos - thresholdIdx;
1281 for( int i = thresholdIdx - 1; i >= 0; i--)
1282 if ( abs( eval[i] - threshold) < FLT_EPSILON )
1284 float hitRate = ((float) numPosTrue) / ((float) numPos);
1286 for( int i = 0; i < sCount; i++ )
1288 if( ((CvCascadeBoostTrainData*)data)->featureEvaluator->getCls( i ) == 0.0F )
1295 float falseAlarm = ((float) numFalse) / ((float) numNeg);
1297 cout << "|"; cout.width(4); cout << right << weak->total;
1298 cout << "|"; cout.width(9); cout << right << hitRate;
1299 cout << "|"; cout.width(9); cout << right << falseAlarm;
1300 cout << "|" << endl;
1301 cout << "+----+---------+---------+" << endl;
1303 return falseAlarm <= maxFalseAlarm;
1306 void CvCascadeBoost::write( FileStorage &fs, const Mat& featureMap ) const
1309 CvCascadeBoostTree* weakTree;
1310 fs << CC_WEAK_COUNT << weak->total;
1311 fs << CC_STAGE_THRESHOLD << threshold;
1312 fs << CC_WEAK_CLASSIFIERS << "[";
1313 for( int wi = 0; wi < weak->total; wi++)
1315 /*sprintf( cmnt, "tree %i", wi );
1316 cvWriteComment( fs, cmnt, 0 );*/
1317 weakTree = *((CvCascadeBoostTree**) cvGetSeqElem( weak, wi ));
1318 weakTree->write( fs, featureMap );
1323 bool CvCascadeBoost::read( const FileNode &node,
1324 const CvFeatureEvaluator* _featureEvaluator,
1325 const CvCascadeBoostParams& _params )
1327 CvMemStorage* storage;
1329 data = new CvCascadeBoostTrainData( _featureEvaluator, _params );
1330 set_params( _params );
1332 node[CC_STAGE_THRESHOLD] >> threshold;
1333 FileNode rnode = node[CC_WEAK_CLASSIFIERS];
1335 storage = cvCreateMemStorage();
1336 weak = cvCreateSeq( 0, sizeof(CvSeq), sizeof(CvBoostTree*), storage );
1337 for( FileNodeIterator it = rnode.begin(); it != rnode.end(); it++ )
1339 CvCascadeBoostTree* tree = new CvCascadeBoostTree();
1340 tree->read( *it, this, data );
1341 cvSeqPush( weak, &tree );
1346 void CvCascadeBoost::markUsedFeaturesInMap( Mat& featureMap )
1348 for( int wi = 0; wi < weak->total; wi++ )
1350 CvCascadeBoostTree* weakTree = *((CvCascadeBoostTree**) cvGetSeqElem( weak, wi ));
1351 weakTree->markFeaturesInMap( featureMap );