opencv/apps/traincascade/boost.cpp

   1 #include "boost.h"
   2 #include "cascadeclassifier.h"
   3 #include <queue>
   4
   5 using namespace std;
   6
   7 static inline double
   8 logRatio( double val )
   9 {
  10     const double eps = 1e-5;
  11
  12     val = max( val, eps );
  13     val = min( val, 1. - eps );
  14     return log( val/(1. - val) );
  15 }
  16
  17 #define CV_CMP_FLT(i,j) (i < j)
  18 static CV_IMPLEMENT_QSORT_EX( icvSortFlt, float, CV_CMP_FLT, const float* )
  19
  20 #define CV_CMP_NUM_IDX(i,j) (aux[i] < aux[j])
  21 static CV_IMPLEMENT_QSORT_EX( icvSortIntAux, int, CV_CMP_NUM_IDX, const float* )
  22 static CV_IMPLEMENT_QSORT_EX( icvSortUShAux, unsigned short, CV_CMP_NUM_IDX, const float* )
  23
  24 #define CV_THRESHOLD_EPS (0.00001F)
  25
  26 static const int MinBlockSize = 1 << 16;
  27 static const int BlockSizeDelta = 1 << 10;
  28
  29 //----------------------------- CascadeBoostParams -------------------------------------------------
  30
  31 CvCascadeBoostParams::CvCascadeBoostParams() : minHitRate( 0.995F), maxFalseAlarm( 0.5F )
  32 {
  33     boost_type = CvBoost::GENTLE;
  34     use_surrogates = use_1se_rule = truncate_pruned_tree = false;
  35 }
  36
  37 CvCascadeBoostParams::CvCascadeBoostParams( int _boostType,
  38         float _minHitRate, float _maxFalseAlarm,
  39         double _weightTrimRate, int _maxDepth, int _maxWeakCount ) :
  40     CvBoostParams( _boostType, _maxWeakCount, _weightTrimRate, _maxDepth, false, 0 )
  41 {
  42     boost_type = CvBoost::GENTLE;
  43     minHitRate = _minHitRate;
  44     maxFalseAlarm = _maxFalseAlarm;
  45     use_surrogates = use_1se_rule = truncate_pruned_tree = false;
  46 }
  47
  48 void CvCascadeBoostParams::write( FileStorage &fs ) const
  49 {
  50     String boostTypeStr = boost_type == CvBoost::DISCRETE ? CC_DISCRETE_BOOST :
  51                           boost_type == CvBoost::REAL ? CC_REAL_BOOST :
  52                           boost_type == CvBoost::LOGIT ? CC_LOGIT_BOOST :
  53                           boost_type == CvBoost::GENTLE ? CC_GENTLE_BOOST : String();
  54     CV_Assert( !boostTypeStr.empty() );
  55     fs << CC_BOOST_TYPE << boostTypeStr;
  56     fs << CC_MINHITRATE << minHitRate;
  57     fs << CC_MAXFALSEALARM << maxFalseAlarm;
  58     fs << CC_TRIM_RATE << weight_trim_rate;
  59     fs << CC_MAX_DEPTH << max_depth;
  60     fs << CC_WEAK_COUNT << weak_count;
  61 }
  62
  63 bool CvCascadeBoostParams::read( const FileNode &node )
  64 {
  65     String boostTypeStr;
  66     FileNode rnode = node[CC_BOOST_TYPE];
  67     rnode >> boostTypeStr;
  68     boost_type = !boostTypeStr.compare( CC_DISCRETE_BOOST ) ? CvBoost::DISCRETE :
  69                  !boostTypeStr.compare( CC_REAL_BOOST ) ? CvBoost::REAL :
  70                  !boostTypeStr.compare( CC_LOGIT_BOOST ) ? CvBoost::LOGIT :
  71                  !boostTypeStr.compare( CC_GENTLE_BOOST ) ? CvBoost::GENTLE : -1;
  72     if (boost_type == -1)
  73         CV_Error( CV_StsBadArg, "unsupported Boost type" );
  74     node[CC_MINHITRATE] >> minHitRate;
  75     node[CC_MAXFALSEALARM] >> maxFalseAlarm;
  76     node[CC_TRIM_RATE] >> weight_trim_rate ;
  77     node[CC_MAX_DEPTH] >> max_depth ;
  78     node[CC_WEAK_COUNT] >> weak_count ;
  79     if ( minHitRate <= 0 || minHitRate > 1 ||
  80          maxFalseAlarm <= 0 || maxFalseAlarm > 1 ||
  81          weight_trim_rate <= 0 || weight_trim_rate > 1 ||
  82          max_depth <= 0 || weak_count <= 0 )
  83         CV_Error( CV_StsBadArg, "bad parameters range");
  84     return true;
  85 }
  86
  87 void CvCascadeBoostParams::printDefaults() const
  88 {
  89     cout << "--boostParams--" << endl;
  90     cout << "  [-bt <{" << CC_DISCRETE_BOOST << ", "
  91                         << CC_REAL_BOOST << ", "
  92                         << CC_LOGIT_BOOST ", "
  93                         << CC_GENTLE_BOOST << "(default)}>]" << endl;
  94     cout << "  [-minHitRate <min_hit_rate> = " << minHitRate << ">]" << endl;
  95     cout << "  [-maxFalseAlarmRate <max_false_alarm_rate = " << maxFalseAlarm << ">]" << endl;
  96     cout << "  [-weightTrimRate <weight_trim_rate = " << weight_trim_rate << ">]" << endl;
  97     cout << "  [-maxDepth <max_depth_of_weak_tree = " << max_depth << ">]" << endl;
  98     cout << "  [-maxWeakCount <max_weak_tree_count = " << weak_count << ">]" << endl;
  99 }
 100
 101 void CvCascadeBoostParams::printAttrs() const
 102 {
 103     String boostTypeStr = boost_type == CvBoost::DISCRETE ? CC_DISCRETE_BOOST :
 104                           boost_type == CvBoost::REAL ? CC_REAL_BOOST :
 105                           boost_type == CvBoost::LOGIT  ? CC_LOGIT_BOOST :
 106                           boost_type == CvBoost::GENTLE ? CC_GENTLE_BOOST : String();
 107     CV_Assert( !boostTypeStr.empty() );
 108     cout << "boostType: " << boostTypeStr << endl;
 109     cout << "minHitRate: " << minHitRate << endl;
 110     cout << "maxFalseAlarmRate: " <<  maxFalseAlarm << endl;
 111     cout << "weightTrimRate: " << weight_trim_rate << endl;
 112     cout << "maxTreeDepth: " << max_depth << endl;
 113     cout << "maxWeakCount: " << weak_count << endl;
 114 }
 115
 116 bool CvCascadeBoostParams::scanAttr( const String prmName, const String val)
 117 {
 118     bool res = true;
 119
 120     if( !prmName.compare( "-bt" ) )
 121     {
 122         boost_type = !val.compare( CC_DISCRETE_BOOST ) ? CvBoost::DISCRETE :
 123                      !val.compare( CC_REAL_BOOST ) ? CvBoost::REAL :
 124                      !val.compare( CC_LOGIT_BOOST ) ? CvBoost::LOGIT :
 125                      !val.compare( CC_GENTLE_BOOST ) ? CvBoost::GENTLE : -1;
 126         if (boost_type == -1)
 127             res = false;
 128     }
 129     else if( !prmName.compare( "-minHitRate" ) )
 130     {
 131         minHitRate = (float) atof( val.c_str() );
 132     }
 133     else if( !prmName.compare( "-maxFalseAlarmRate" ) )
 134     {
 135         weight_trim_rate = (float) atof( val.c_str() );
 136     }
 137     else if( !prmName.compare( "-weightTrimRate" ) )
 138     {
 139         weight_trim_rate = (float) atof( val.c_str() );
 140     }
 141     else if( !prmName.compare( "-maxDepth" ) )
 142     {
 143         max_depth = atoi( val.c_str() );
 144     }
 145     else if( !prmName.compare( "-maxWeakCount" ) )
 146     {
 147         weak_count = atoi( val.c_str() );
 148     }
 149     else
 150         res = false;
 151
 152     return res;
 153 }
 154
 155 //---------------------------- CascadeBoostTrainData -----------------------------
 156
 157 CvCascadeBoostTrainData::CvCascadeBoostTrainData( const CvFeatureEvaluator* _featureEvaluator,
 158                                                   const CvDTreeParams& _params )
 159 {
 160     is_classifier = true;
 161     var_all = var_count = (int)_featureEvaluator->getNumFeatures();
 162
 163     featureEvaluator = _featureEvaluator;
 164     shared = true;
 165     set_params( _params );
 166     max_c_count = MAX( 2, featureEvaluator->getMaxCatCount() );
 167     var_type = cvCreateMat( 1, var_count + 2, CV_32SC1 );
 168     if ( featureEvaluator->getMaxCatCount() > 0 )
 169     {
 170         numPrecalcIdx = 0;
 171         cat_var_count = var_count;
 172         ord_var_count = 0;
 173         for( int vi = 0; vi < var_count; vi++ )
 174         {
 175             var_type->data.i[vi] = vi;
 176         }
 177     }
 178     else
 179     {
 180         cat_var_count = 0;
 181         ord_var_count = var_count;
 182         for( int vi = 1; vi <= var_count; vi++ )
 183         {
 184             var_type->data.i[vi-1] = -vi;
 185         }
 186     }
 187     var_type->data.i[var_count] = cat_var_count;
 188     var_type->data.i[var_count+1] = cat_var_count+1;
 189
 190     int maxSplitSize = cvAlign(sizeof(CvDTreeSplit) + (MAX(0,max_c_count - 33)/32)*sizeof(int),sizeof(void*));
 191     int treeBlockSize = MAX((int)sizeof(CvDTreeNode)*8, maxSplitSize);
 192     treeBlockSize = MAX(treeBlockSize + BlockSizeDelta, MinBlockSize);
 193     tree_storage = cvCreateMemStorage( treeBlockSize );
 194     node_heap = cvCreateSet( 0, sizeof(node_heap[0]), sizeof(CvDTreeNode), tree_storage );
 195     split_heap = cvCreateSet( 0, sizeof(split_heap[0]), maxSplitSize, tree_storage );
 196 }
 197
 198 CvCascadeBoostTrainData::CvCascadeBoostTrainData( const CvFeatureEvaluator* _featureEvaluator,
 199                                                  int _numSamples,
 200                                                  int _precalcValBufSize, int _precalcIdxBufSize,
 201                                                  const CvDTreeParams& _params )
 202 {
 203     setData( _featureEvaluator, _numSamples, _precalcValBufSize, _precalcIdxBufSize, _params );
 204 }
 205
 206 void CvCascadeBoostTrainData::setData( const CvFeatureEvaluator* _featureEvaluator,
 207                                       int _numSamples,
 208                                       int _precalcValBufSize, int _precalcIdxBufSize,
 209                                                                           const CvDTreeParams& _params )
 210 {
 211     int* idst = 0;
 212     unsigned short* udst = 0;
 213
 214     clear();
 215     shared = true;
 216     have_labels = true;
 217     have_priors = false;
 218     is_classifier = true;
 219
 220     rng = cvRNG(-1);
 221
 222     set_params( _params );
 223
 224     CV_Assert( _featureEvaluator );
 225     featureEvaluator = _featureEvaluator;
 226
 227     max_c_count = MAX( 2, featureEvaluator->getMaxCatCount() );
 228     _resp = featureEvaluator->getCls();
 229     responses = &_resp;
 230     // TODO: check responses: elements must be 0 or 1
 231
 232         if( _precalcValBufSize < 0 || _precalcIdxBufSize < 0)
 233         CV_Error( CV_StsOutOfRange, "_numPrecalcVal and _numPrecalcIdx must be positive or 0" );
 234
 235         var_count = var_all = featureEvaluator->getNumFeatures();
 236     sample_count = _numSamples;
 237
 238     is_buf_16u = false;
 239     if (sample_count < 65536)
 240         is_buf_16u = true;
 241
 242         numPrecalcVal = min( (_precalcValBufSize*1048576) / int(sizeof(float)*sample_count), var_count );
 243     numPrecalcIdx = min( (_precalcIdxBufSize*1048576) /
 244                 int((is_buf_16u ? sizeof(unsigned short) : sizeof (int))*sample_count), var_count );
 245
 246     valCache.create( numPrecalcVal, sample_count, CV_32FC1 );
 247     var_type = cvCreateMat( 1, var_count + 2, CV_32SC1 );
 248
 249     if ( featureEvaluator->getMaxCatCount() > 0 )
 250     {
 251         numPrecalcIdx = 0;
 252         cat_var_count = var_count;
 253         ord_var_count = 0;
 254         for( int vi = 0; vi < var_count; vi++ )
 255         {
 256             var_type->data.i[vi] = vi;
 257         }
 258     }
 259     else
 260     {
 261         cat_var_count = 0;
 262         ord_var_count = var_count;
 263         for( int vi = 1; vi <= var_count; vi++ )
 264         {
 265             var_type->data.i[vi-1] = -vi;
 266         }
 267     }
 268     var_type->data.i[var_count] = cat_var_count;
 269     var_type->data.i[var_count+1] = cat_var_count+1;
 270     work_var_count = ( cat_var_count ? 0 : numPrecalcIdx ) + 1;
 271     buf_size = (work_var_count + 1) * sample_count;
 272     buf_count = 2;
 273
 274     if ( is_buf_16u )
 275         buf = cvCreateMat( buf_count, buf_size, CV_16UC1 );
 276     else
 277         buf = cvCreateMat( buf_count, buf_size, CV_32SC1 );
 278
 279     cat_count = cvCreateMat( 1, cat_var_count + 1, CV_32SC1 );
 280
 281     // precalculate valCache and set indices in buf
 282         precalculate();
 283
 284     // now calculate the maximum size of split,
 285     // create memory storage that will keep nodes and splits of the decision tree
 286     // allocate root node and the buffer for the whole training data
 287     int maxSplitSize = cvAlign(sizeof(CvDTreeSplit) +
 288         (MAX(0,sample_count - 33)/32)*sizeof(int),sizeof(void*));
 289     int treeBlockSize = MAX((int)sizeof(CvDTreeNode)*8, maxSplitSize);
 290     treeBlockSize = MAX(treeBlockSize + BlockSizeDelta, MinBlockSize);
 291     tree_storage = cvCreateMemStorage( treeBlockSize );
 292     node_heap = cvCreateSet( 0, sizeof(*node_heap), sizeof(CvDTreeNode), tree_storage );
 293
 294     int nvSize = var_count*sizeof(int);
 295     nvSize = cvAlign(MAX( nvSize, (int)sizeof(CvSetElem) ), sizeof(void*));
 296     int tempBlockSize = nvSize;
 297     tempBlockSize = MAX( tempBlockSize + BlockSizeDelta, MinBlockSize );
 298     temp_storage = cvCreateMemStorage( tempBlockSize );
 299     nv_heap = cvCreateSet( 0, sizeof(*nv_heap), nvSize, temp_storage );
 300
 301     data_root = new_node( 0, sample_count, 0, 0 );
 302
 303     // set sample labels
 304     if (is_buf_16u)
 305         udst = (unsigned short*)(buf->data.s + work_var_count*sample_count);
 306     else
 307         idst = buf->data.i + work_var_count*sample_count;
 308
 309     for (int si = 0; si < sample_count; si++)
 310     {
 311         if (udst)
 312             udst[si] = (unsigned short)si;
 313         else
 314             idst[si] = si;
 315     }
 316     for( int vi = 0; vi < var_count; vi++ )
 317         data_root->set_num_valid(vi, sample_count);
 318     for( int vi = 0; vi < cat_var_count; vi++ )
 319         cat_count->data.i[vi] = max_c_count;
 320
 321     cat_count->data.i[cat_var_count] = 2;
 322
 323     maxSplitSize = cvAlign(sizeof(CvDTreeSplit) +
 324         (MAX(0,max_c_count - 33)/32)*sizeof(int),sizeof(void*));
 325     split_heap = cvCreateSet( 0, sizeof(*split_heap), maxSplitSize, tree_storage );
 326
 327     priors = cvCreateMat( 1, get_num_classes(), CV_64F );
 328     cvSet(priors, cvScalar(1));
 329     priors_mult = cvCloneMat( priors );
 330     counts = cvCreateMat( 1, get_num_classes(), CV_32SC1 );
 331     direction = cvCreateMat( 1, sample_count, CV_8UC1 );
 332     split_buf = cvCreateMat( 1, sample_count, CV_32SC1 );
 333 }
 334
 335 void CvCascadeBoostTrainData::free_train_data()
 336 {
 337     CvDTreeTrainData::free_train_data();
 338     valCache.release();
 339 }
 340
 341 const int* CvCascadeBoostTrainData::get_class_labels( CvDTreeNode* n, int* labelsBuf)
 342 {
 343     int nodeSampleCount = n->sample_count;
 344     int rStep = CV_IS_MAT_CONT( responses->type ) ? 1 : responses->step / CV_ELEM_SIZE( responses->type );
 345
 346     int* sampleIndicesBuf = labelsBuf; //
 347     const int* sampleIndices = get_sample_indices(n, sampleIndicesBuf);
 348     for( int si = 0; si < nodeSampleCount; si++ )
 349     {
 350         int sidx = sampleIndices[si];
 351         labelsBuf[si] = (int)responses->data.fl[sidx*rStep];
 352     }
 353     return labelsBuf;
 354 }
 355
 356 const int* CvCascadeBoostTrainData::get_sample_indices( CvDTreeNode* n, int* indicesBuf )
 357 {
 358     return CvDTreeTrainData::get_cat_var_data( n, get_work_var_count(), indicesBuf );
 359 }
 360
 361 const int* CvCascadeBoostTrainData::get_cv_labels( CvDTreeNode* n, int* labels_buf )
 362 {
 363     return CvDTreeTrainData::get_cat_var_data( n, get_work_var_count() - 1, labels_buf );
 364 }
 365
 366 void CvCascadeBoostTrainData::get_ord_var_data( CvDTreeNode* n, int vi, float* ordValuesBuf, int* sortedIndicesBuf,
 367         const float** ordValues, const int** sortedIndices, int* sampleIndicesBuf )
 368 {
 369     int nodeSampleCount = n->sample_count;
 370     const int* sampleIndices = get_sample_indices(n, sampleIndicesBuf);
 371
 372         if ( vi < numPrecalcIdx )
 373         {
 374                 if( !is_buf_16u )
 375             *sortedIndices = buf->data.i + n->buf_idx*buf->cols + vi*sample_count + n->offset;
 376             else
 377         {
 378                 const unsigned short* shortIndices = (const unsigned short*)(buf->data.s + n->buf_idx*buf->cols +
 379                                                       vi*sample_count + n->offset );
 380                 for( int i = 0; i < nodeSampleCount; i++ )
 381                 sortedIndicesBuf[i] = shortIndices[i];
 382             *sortedIndices = sortedIndicesBuf;
 383             }
 384
 385                 if ( vi < numPrecalcVal )
 386                 {
 387                         for( int i = 0; i < nodeSampleCount; i++ )
 388                 {
 389                 int idx = (*sortedIndices)[i];
 390                     idx = sampleIndices[idx];
 391                     ordValuesBuf[i] =  valCache.at<float>( vi, idx);
 392                 }
 393                 }
 394                 else
 395                 {
 396                         for( int i = 0; i < nodeSampleCount; i++ )
 397                 {
 398                 int idx = (*sortedIndices)[i];
 399                     idx = sampleIndices[idx];
 400                                 ordValuesBuf[i] = (*featureEvaluator)( vi, idx);
 401                         }
 402                 }
 403     }
 404         else // vi >= numPrecalcIdx
 405         {
 406                 // use sample_indices as temporary buffer for values
 407                 if ( vi < numPrecalcVal )
 408                 {
 409                         for( int i = 0; i < nodeSampleCount; i++ )
 410                 {
 411                 sortedIndicesBuf[i] = i;
 412                     ((float*)sampleIndices)[i] = valCache.at<float>( vi, sampleIndices[i] );
 413                 }
 414                 }
 415                 else
 416                 {
 417             for( int i = 0; i < nodeSampleCount; i++ )
 418                 {
 419                 sortedIndicesBuf[i] = i;
 420                                 ((float*)sampleIndices)[i] = (*featureEvaluator)( vi, sampleIndices[i]);
 421                         }
 422                 }
 423         icvSortIntAux( sortedIndicesBuf, sample_count, (float *)sampleIndices );
 424                 for( int i = 0; i < nodeSampleCount; i++ )
 425             ordValuesBuf[i] = ((float*)sampleIndices)[sortedIndicesBuf[i]];
 426         *sortedIndices = sortedIndicesBuf;
 427         }
 428
 429     *ordValues = ordValuesBuf;
 430 }
 431
 432 const int* CvCascadeBoostTrainData::get_cat_var_data( CvDTreeNode* n, int vi, int* catValuesBuf)
 433 {
 434     int nodeSampleCount = n->sample_count;
 435     int* sampleIndicesBuf = catValuesBuf; //
 436     const int* sampleIndices = get_sample_indices(n, sampleIndicesBuf);
 437
 438     if ( vi < numPrecalcVal )
 439         {
 440                 for( int i = 0; i < nodeSampleCount; i++ )
 441             catValuesBuf[i] = (int) valCache.at<float>( vi, sampleIndices[i]);
 442         }
 443         else
 444         {
 445                 for( int i = 0; i < nodeSampleCount; i++ )
 446                         catValuesBuf[i] = (int)(*featureEvaluator)( vi, sampleIndices[i] );
 447         }
 448     return catValuesBuf;
 449 }
 450
 451 float CvCascadeBoostTrainData::getVarValue( int vi, int si )
 452 {
 453     if ( vi < numPrecalcVal && !valCache.empty() )
 454             return valCache.at<float>( vi, si );
 455         return (*featureEvaluator)( vi, si );
 456 }
 457
 458
 459 struct FeatureIdxOnlyPrecalc
 460 {
 461     FeatureIdxOnlyPrecalc( const CvFeatureEvaluator* _feval, CvMat* _buf, int _sample_count, bool _is_buf_16u )
 462     {
 463         feval = _feval;
 464         sample_count = _sample_count;
 465         udst = (unsigned short*)_buf->data.s;
 466         idst = _buf->data.i;
 467         is_buf_16u = _is_buf_16u;
 468     }
 469     void operator()( const BlockedRange& range ) const
 470     {
 471         cv::AutoBuffer<float> valCache(sample_count);
 472         float* valCachePtr = (float*)valCache;
 473         for ( int fi = range.begin(); fi < range.end(); fi++)
 474         {
 475             for( int si = 0; si < sample_count; si++ )
 476             {
 477                 valCachePtr[si] = (*feval)( fi, si );
 478                 if ( is_buf_16u )
 479                     *(udst + fi*sample_count + si) = (unsigned short)si;
 480                 else
 481                     *(idst + fi*sample_count + si) = si;
 482             }
 483             if ( is_buf_16u )
 484                 icvSortUShAux( udst + fi*sample_count, sample_count, valCachePtr );
 485             else
 486                 icvSortIntAux( idst + fi*sample_count, sample_count, valCachePtr );
 487         }
 488     }
 489     const CvFeatureEvaluator* feval;
 490     int sample_count;
 491     int* idst;
 492     unsigned short* udst;
 493     bool is_buf_16u;
 494 };
 495
 496 struct FeatureValAndIdxPrecalc
 497 {
 498     FeatureValAndIdxPrecalc( const CvFeatureEvaluator* _feval, CvMat* _buf, Mat* _valCache, int _sample_count, bool _is_buf_16u )
 499     {
 500         feval = _feval;
 501         valCache = _valCache;
 502         sample_count = _sample_count;
 503         udst = (unsigned short*)_buf->data.s;
 504         idst = _buf->data.i;
 505         is_buf_16u = _is_buf_16u;
 506     }
 507     void operator()( const BlockedRange& range ) const
 508     {
 509         for ( int fi = range.begin(); fi < range.end(); fi++)
 510         {
 511             for( int si = 0; si < sample_count; si++ )
 512             {
 513                 valCache->at<float>(fi,si) = (*feval)( fi, si );
 514                 if ( is_buf_16u )
 515                     *(udst + fi*sample_count + si) = (unsigned short)si;
 516                 else
 517                     *(idst + fi*sample_count + si) = si;
 518             }
 519             if ( is_buf_16u )
 520                 icvSortUShAux( udst + fi*sample_count, sample_count, valCache->ptr<float>(fi) );
 521             else
 522                 icvSortIntAux( idst + fi*sample_count, sample_count, valCache->ptr<float>(fi) );
 523         }
 524     }
 525     const CvFeatureEvaluator* feval;
 526     Mat* valCache;
 527     int sample_count;
 528     int* idst;
 529     unsigned short* udst;
 530     bool is_buf_16u;
 531 };
 532
 533 struct FeatureValOnlyPrecalc
 534 {
 535     FeatureValOnlyPrecalc( const CvFeatureEvaluator* _feval, Mat* _valCache, int _sample_count )
 536     {
 537         feval = _feval;
 538         valCache = _valCache;
 539         sample_count = _sample_count;
 540     }
 541     void operator()( const BlockedRange& range ) const
 542     {
 543         for ( int fi = range.begin(); fi < range.end(); fi++)
 544             for( int si = 0; si < sample_count; si++ )
 545                 valCache->at<float>(fi,si) = (*feval)( fi, si );
 546     }
 547     const CvFeatureEvaluator* feval;
 548     Mat* valCache;
 549     int sample_count;
 550 };
 551
 552 void CvCascadeBoostTrainData::precalculate()
 553 {
 554     int minNum = MIN( numPrecalcVal, numPrecalcIdx);
 555     CV_DbgAssert( !valCache.empty() );
 556
 557     double proctime = -TIME( 0 );
 558     parallel_for( BlockedRange(numPrecalcVal, numPrecalcIdx),
 559                   FeatureIdxOnlyPrecalc(featureEvaluator, buf, sample_count, is_buf_16u) );
 560     parallel_for( BlockedRange(0, minNum),
 561                   FeatureValAndIdxPrecalc(featureEvaluator, buf, &valCache, sample_count, is_buf_16u) );
 562     parallel_for( BlockedRange(minNum, numPrecalcVal),
 563                   FeatureValOnlyPrecalc(featureEvaluator, &valCache, sample_count) );
 564     cout << "Precalculation time: " << (proctime + TIME( 0 )) << endl;
 565 }
 566
 567 //-------------------------------- CascadeBoostTree ----------------------------------------
 568
 569 CvDTreeNode* CvCascadeBoostTree::predict( int sampleIdx ) const
 570 {
 571     CvDTreeNode* node = root;
 572     if( !node )
 573         CV_Error( CV_StsError, "The tree has not been trained yet" );
 574
 575     if ( ((CvCascadeBoostTrainData*)data)->featureEvaluator->getMaxCatCount() == 0 ) // ordered
 576     {
 577         while( node->left )
 578         {
 579             CvDTreeSplit* split = node->split;
 580             float val = ((CvCascadeBoostTrainData*)data)->getVarValue( split->var_idx, sampleIdx );
 581             node = val <= split->ord.c ? node->left : node->right;
 582         }
 583     }
 584     else // categorical
 585     {
 586         while( node->left )
 587         {
 588             CvDTreeSplit* split = node->split;
 589             int c = (int)((CvCascadeBoostTrainData*)data)->getVarValue( split->var_idx, sampleIdx );
 590             node = CV_DTREE_CAT_DIR(c, split->subset) < 0 ? node->left : node->right;
 591         }
 592     }
 593     return node;
 594 }
 595
 596 void CvCascadeBoostTree::write( FileStorage &fs, const Mat& featureMap )
 597 {
 598     int maxCatCount = ((CvCascadeBoostTrainData*)data)->featureEvaluator->getMaxCatCount();
 599     int subsetN = (maxCatCount + 31)/32;
 600     queue<CvDTreeNode*> internalNodesQueue;
 601     int size = (int)pow( 2.f, (float)ensemble->get_params().max_depth);
 602     Ptr<float> leafVals = new float[size];
 603     int leafValIdx = 0;
 604     int internalNodeIdx = 1;
 605     CvDTreeNode* tempNode;
 606
 607     CV_DbgAssert( root );
 608     internalNodesQueue.push( root );
 609
 610     fs << "{";
 611     fs << CC_INTERNAL_NODES << "[:";
 612     while (!internalNodesQueue.empty())
 613     {
 614         tempNode = internalNodesQueue.front();
 615         CV_Assert( tempNode->left );
 616         if ( !tempNode->left->left && !tempNode->left->right) // left node is leaf
 617         {
 618             leafVals[-leafValIdx] = (float)tempNode->left->value;
 619             fs << leafValIdx-- ;
 620         }
 621         else
 622         {
 623             internalNodesQueue.push( tempNode->left );
 624             fs << internalNodeIdx++;
 625         }
 626         CV_Assert( tempNode->right );
 627         if ( !tempNode->right->left && !tempNode->right->right) // right node is leaf
 628         {
 629             leafVals[-leafValIdx] = (float)tempNode->right->value;
 630             fs << leafValIdx--;
 631         }
 632         else
 633         {
 634             internalNodesQueue.push( tempNode->right );
 635             fs << internalNodeIdx++;
 636         }
 637         int fidx = tempNode->split->var_idx;
 638         fidx = featureMap.empty() ? fidx : featureMap.at<int>(0, fidx);
 639         fs << fidx;
 640         if ( !maxCatCount )
 641             fs << tempNode->split->ord.c;
 642         else
 643             for( int i = 0; i < subsetN; i++ )
 644                 fs << tempNode->split->subset[i];
 645         internalNodesQueue.pop();
 646     }
 647     fs << "]"; // CC_INTERNAL_NODES
 648
 649     fs << CC_LEAF_VALUES << "[:";
 650     for (int ni = 0; ni < -leafValIdx; ni++)
 651         fs << leafVals[ni];
 652     fs << "]"; // CC_LEAF_VALUES
 653     fs << "}";
 654 }
 655
 656 void CvCascadeBoostTree::read( const FileNode &node, CvBoost* _ensemble,
 657                                 CvDTreeTrainData* _data )
 658 {
 659     int maxCatCount = ((CvCascadeBoostTrainData*)_data)->featureEvaluator->getMaxCatCount();
 660     int subsetN = (maxCatCount + 31)/32;
 661     int step = 3 + ( maxCatCount>0 ? subsetN : 1 );
 662
 663     queue<CvDTreeNode*> internalNodesQueue;
 664     FileNodeIterator internalNodesIt, leafValsuesIt;
 665     CvDTreeNode* prntNode, *cldNode;
 666
 667     clear();
 668     data = _data;
 669     ensemble = _ensemble;
 670     pruned_tree_idx = 0;
 671
 672     // read tree nodes
 673     FileNode rnode = node[CC_INTERNAL_NODES];
 674     internalNodesIt = rnode.end();
 675     leafValsuesIt = node[CC_LEAF_VALUES].end();
 676     internalNodesIt--; leafValsuesIt--;
 677     for( size_t i = 0; i < rnode.size()/step; i++ )
 678     {
 679         prntNode = data->new_node( 0, 0, 0, 0 );
 680         if ( maxCatCount > 0 )
 681         {
 682             prntNode->split = data->new_split_cat( 0, 0 );
 683             for( int j = subsetN-1; j>=0; j--)
 684             {
 685                 *internalNodesIt >> prntNode->split->subset[j]; internalNodesIt--;
 686             }
 687         }
 688         else
 689         {
 690             float split_value;
 691             *internalNodesIt >> split_value; internalNodesIt--;
 692             prntNode->split = data->new_split_ord( 0, split_value, 0, 0, 0);
 693         }
 694         *internalNodesIt >> prntNode->split->var_idx; internalNodesIt--;
 695         int ridx, lidx;
 696         *internalNodesIt >> ridx; internalNodesIt--;
 697         *internalNodesIt >> lidx;internalNodesIt--;
 698         if ( ridx <= 0)
 699         {
 700             prntNode->right = cldNode = data->new_node( 0, 0, 0, 0 );
 701             *leafValsuesIt >> cldNode->value; leafValsuesIt--;
 702             cldNode->parent = prntNode;
 703         }
 704         else
 705         {
 706             prntNode->right = internalNodesQueue.front();
 707             prntNode->right->parent = prntNode;
 708             internalNodesQueue.pop();
 709         }
 710
 711         if ( lidx <= 0)
 712         {
 713             prntNode->left = cldNode = data->new_node( 0, 0, 0, 0 );
 714             *leafValsuesIt >> cldNode->value; leafValsuesIt--;
 715             cldNode->parent = prntNode;
 716         }
 717         else
 718         {
 719             prntNode->left = internalNodesQueue.front();
 720             prntNode->left->parent = prntNode;
 721             internalNodesQueue.pop();
 722         }
 723
 724         internalNodesQueue.push( prntNode );
 725     }
 726
 727     root = internalNodesQueue.front();
 728     internalNodesQueue.pop();
 729 }
 730
 731 void CvCascadeBoostTree::split_node_data( CvDTreeNode* node )
 732 {
 733     int n = node->sample_count, nl, nr, scount = data->sample_count;
 734     char* dir = (char*)data->direction->data.ptr;
 735     CvDTreeNode *left = 0, *right = 0;
 736     int* newIdx = data->split_buf->data.i;
 737     int newBufIdx = data->get_child_buf_idx( node );
 738     int workVarCount = data->get_work_var_count();
 739     CvMat* buf = data->buf;
 740     cv::AutoBuffer<uchar> inn_buf(n*(3*sizeof(int)+sizeof(float)));
 741     int* tempBuf = (int*)(uchar*)inn_buf;
 742     bool splitInputData;
 743
 744     complete_node_dir(node);
 745
 746     for( int i = nl = nr = 0; i < n; i++ )
 747     {
 748         int d = dir[i];
 749         // initialize new indices for splitting ordered variables
 750         newIdx[i] = (nl & (d-1)) | (nr & -d); // d ? ri : li
 751         nr += d;
 752         nl += d^1;
 753     }
 754
 755     node->left = left = data->new_node( node, nl, newBufIdx, node->offset );
 756     node->right = right = data->new_node( node, nr, newBufIdx, node->offset + nl );
 757
 758     splitInputData = node->depth + 1 < data->params.max_depth &&
 759         (node->left->sample_count > data->params.min_sample_count ||
 760         node->right->sample_count > data->params.min_sample_count);
 761
 762     // split ordered variables, keep both halves sorted.
 763     for( int vi = 0; vi < ((CvCascadeBoostTrainData*)data)->numPrecalcIdx; vi++ )
 764     {
 765         int ci = data->get_var_type(vi);
 766         if( ci >= 0 || !splitInputData )
 767             continue;
 768
 769         int n1 = node->get_num_valid(vi);
 770         float *src_val_buf = (float*)(tempBuf + n);
 771         int *src_sorted_idx_buf = (int*)(src_val_buf + n);
 772         int *src_sample_idx_buf = src_sorted_idx_buf + n;
 773         const int* src_sorted_idx = 0;
 774         const float* src_val = 0;
 775         data->get_ord_var_data(node, vi, src_val_buf, src_sorted_idx_buf, &src_val, &src_sorted_idx, src_sample_idx_buf);
 776
 777         for(int i = 0; i < n; i++)
 778             tempBuf[i] = src_sorted_idx[i];
 779
 780         if (data->is_buf_16u)
 781         {
 782             unsigned short *ldst, *rdst, *ldst0, *rdst0;
 783             ldst0 = ldst = (unsigned short*)(buf->data.s + left->buf_idx*buf->cols +
 784                 vi*scount + left->offset);
 785             rdst0 = rdst = (unsigned short*)(ldst + nl);
 786
 787             // split sorted
 788             for( int i = 0; i < n1; i++ )
 789             {
 790                 int idx = tempBuf[i];
 791                 int d = dir[idx];
 792                 idx = newIdx[idx];
 793                 if (d)
 794                 {
 795                     *rdst = (unsigned short)idx;
 796                     rdst++;
 797                 }
 798                 else
 799                 {
 800                     *ldst = (unsigned short)idx;
 801                     ldst++;
 802                 }
 803             }
 804             assert( n1 == n);
 805
 806             left->set_num_valid(vi, (int)(ldst - ldst0));
 807             right->set_num_valid(vi, (int)(rdst - rdst0));
 808         }
 809         else
 810         {
 811             int *ldst0, *ldst, *rdst0, *rdst;
 812             ldst0 = ldst = buf->data.i + left->buf_idx*buf->cols +
 813                 vi*scount + left->offset;
 814             rdst0 = rdst = buf->data.i + right->buf_idx*buf->cols +
 815                 vi*scount + right->offset;
 816
 817             // split sorted
 818             for( int i = 0; i < n1; i++ )
 819             {
 820                 int idx = tempBuf[i];
 821                 int d = dir[idx];
 822                 idx = newIdx[idx];
 823                 if (d)
 824                 {
 825                     *rdst = idx;
 826                     rdst++;
 827                 }
 828                 else
 829                 {
 830                     *ldst = idx;
 831                     ldst++;
 832                 }
 833             }
 834
 835             left->set_num_valid(vi, (int)(ldst - ldst0));
 836             right->set_num_valid(vi, (int)(rdst - rdst0));
 837             CV_Assert( n1 == n);
 838         }
 839     }
 840
 841     // split cv_labels using newIdx relocation table
 842     int *src_lbls_buf = tempBuf + n;
 843     const int* src_lbls = data->get_cv_labels(node, src_lbls_buf);
 844
 845     for(int i = 0; i < n; i++)
 846         tempBuf[i] = src_lbls[i];
 847
 848     if (data->is_buf_16u)
 849     {
 850         unsigned short *ldst = (unsigned short *)(buf->data.s + left->buf_idx*buf->cols +
 851             (workVarCount-1)*scount + left->offset);
 852         unsigned short *rdst = (unsigned short *)(buf->data.s + right->buf_idx*buf->cols +
 853             (workVarCount-1)*scount + right->offset);
 854
 855         for( int i = 0; i < n; i++ )
 856         {
 857             int idx = tempBuf[i];
 858             if (dir[i])
 859             {
 860                 *rdst = (unsigned short)idx;
 861                 rdst++;
 862             }
 863             else
 864             {
 865                 *ldst = (unsigned short)idx;
 866                 ldst++;
 867             }
 868         }
 869
 870     }
 871     else
 872     {
 873         int *ldst = buf->data.i + left->buf_idx*buf->cols +
 874             (workVarCount-1)*scount + left->offset;
 875         int *rdst = buf->data.i + right->buf_idx*buf->cols +
 876             (workVarCount-1)*scount + right->offset;
 877
 878         for( int i = 0; i < n; i++ )
 879         {
 880             int idx = tempBuf[i];
 881             if (dir[i])
 882             {
 883                 *rdst = idx;
 884                 rdst++;
 885             }
 886             else
 887             {
 888                 *ldst = idx;
 889                 ldst++;
 890             }
 891         }
 892     }
 893     for( int vi = 0; vi < data->var_count; vi++ )
 894     {
 895         left->set_num_valid(vi, (int)(nl));
 896         right->set_num_valid(vi, (int)(nr));
 897     }
 898
 899     // split sample indices
 900     int *sampleIdx_src_buf = tempBuf + n;
 901     const int* sampleIdx_src = data->get_sample_indices(node, sampleIdx_src_buf);
 902
 903     for(int i = 0; i < n; i++)
 904         tempBuf[i] = sampleIdx_src[i];
 905
 906     if (data->is_buf_16u)
 907     {
 908         unsigned short* ldst = (unsigned short*)(buf->data.s + left->buf_idx*buf->cols +
 909             workVarCount*scount + left->offset);
 910         unsigned short* rdst = (unsigned short*)(buf->data.s + right->buf_idx*buf->cols +
 911             workVarCount*scount + right->offset);
 912         for (int i = 0; i < n; i++)
 913         {
 914             unsigned short idx = (unsigned short)tempBuf[i];
 915             if (dir[i])
 916             {
 917                 *rdst = idx;
 918                 rdst++;
 919             }
 920             else
 921             {
 922                 *ldst = idx;
 923                 ldst++;
 924             }
 925         }
 926     }
 927     else
 928     {
 929         int* ldst = buf->data.i + left->buf_idx*buf->cols +
 930             workVarCount*scount + left->offset;
 931         int* rdst = buf->data.i + right->buf_idx*buf->cols +
 932             workVarCount*scount + right->offset;
 933         for (int i = 0; i < n; i++)
 934         {
 935             int idx = tempBuf[i];
 936             if (dir[i])
 937             {
 938                 *rdst = idx;
 939                 rdst++;
 940             }
 941             else
 942             {
 943                 *ldst = idx;
 944                 ldst++;
 945             }
 946         }
 947     }
 948
 949     // deallocate the parent node data that is not needed anymore
 950     data->free_node_data(node);
 951 }
 952
 953 void auxMarkFeaturesInMap( const CvDTreeNode* node, Mat& featureMap)
 954 {
 955     if ( node && node->split )
 956     {
 957         featureMap.ptr<int>(0)[node->split->var_idx] = 1;
 958         auxMarkFeaturesInMap( node->left, featureMap );
 959         auxMarkFeaturesInMap( node->right, featureMap );
 960     }
 961 }
 962
 963 void CvCascadeBoostTree::markFeaturesInMap( Mat& featureMap )
 964 {
 965     auxMarkFeaturesInMap( root, featureMap );
 966 }
 967
 968 //----------------------------------- CascadeBoost --------------------------------------
 969
 970 bool CvCascadeBoost::train( const CvFeatureEvaluator* _featureEvaluator,
 971                            int _numSamples,
 972                            int _precalcValBufSize, int _precalcIdxBufSize,
 973                            const CvCascadeBoostParams& _params )
 974 {
 975     CV_Assert( !data );
 976     clear();
 977     data = new CvCascadeBoostTrainData( _featureEvaluator, _numSamples,
 978                                         _precalcValBufSize, _precalcIdxBufSize, _params );
 979     CvMemStorage *storage = cvCreateMemStorage();
 980     weak = cvCreateSeq( 0, sizeof(CvSeq), sizeof(CvBoostTree*), storage );
 981     storage = 0;
 982
 983     set_params( _params );
 984     if ( (_params.boost_type == LOGIT) || (_params.boost_type == GENTLE) )
 985         data->do_responses_copy();
 986
 987     update_weights( 0 );
 988
 989     cout << "+----+---------+---------+" << endl;
 990     cout << "|  N |    HR   |    FA   |" << endl;
 991     cout << "+----+---------+---------+" << endl;
 992
 993     do
 994     {
 995         CvCascadeBoostTree* tree = new CvCascadeBoostTree;
 996         if( !tree->train( data, subsample_mask, this ) )
 997         {
 998             // TODO: may be should finish the loop (!!!)
 999             assert(0);
1000             delete tree;
1001             continue;
1002         }
1003         cvSeqPush( weak, &tree );
1004         update_weights( tree );
1005         trim_weights();
1006     }
1007     while( !isErrDesired() && (weak->total < params.weak_count) );
1008
1009     data->is_classifier = true;
1010     data->free_train_data();
1011     return true;
1012 }
1013
1014 float CvCascadeBoost::predict( int sampleIdx, bool returnSum ) const
1015 {
1016     CV_Assert( weak );
1017     double sum = 0;
1018     CvSeqReader reader;
1019     cvStartReadSeq( weak, &reader );
1020     cvSetSeqReaderPos( &reader, 0 );
1021     for( int i = 0; i < weak->total; i++ )
1022     {
1023         CvBoostTree* wtree;
1024         CV_READ_SEQ_ELEM( wtree, reader );
1025         sum += ((CvCascadeBoostTree*)wtree)->predict(sampleIdx)->value;
1026     }
1027     if( !returnSum )
1028         sum = sum < threshold - CV_THRESHOLD_EPS ? 0.0 : 1.0;
1029     return (float)sum;
1030 }
1031
1032 bool CvCascadeBoost::set_params( const CvBoostParams& _params )
1033 {
1034     minHitRate = ((CvCascadeBoostParams&)_params).minHitRate;
1035     maxFalseAlarm = ((CvCascadeBoostParams&)_params).maxFalseAlarm;
1036     return ( ( minHitRate > 0 ) && ( minHitRate < 1) &&
1037         ( maxFalseAlarm > 0 ) && ( maxFalseAlarm < 1) &&
1038         CvBoost::set_params( _params ));
1039 }
1040
1041 void CvCascadeBoost::update_weights( CvBoostTree* tree )
1042 {
1043     int n = data->sample_count;
1044     double sumW = 0.;
1045     int step = 0;
1046     float* fdata = 0;
1047     int *sampleIdxBuf;
1048     const int* sampleIdx = 0;
1049     int inn_buf_size = ((params.boost_type == LOGIT) || (params.boost_type == GENTLE) ? n*sizeof(int) : 0) +
1050                        ( !tree ? n*sizeof(int) : 0 );
1051     cv::AutoBuffer<uchar> inn_buf(inn_buf_size);
1052     uchar* cur_inn_buf_pos = (uchar*)inn_buf;
1053     if ( (params.boost_type == LOGIT) || (params.boost_type == GENTLE) )
1054     {
1055         step = CV_IS_MAT_CONT(data->responses_copy->type) ?
1056             1 : data->responses_copy->step / CV_ELEM_SIZE(data->responses_copy->type);
1057         fdata = data->responses_copy->data.fl;
1058         sampleIdxBuf = (int*)cur_inn_buf_pos; cur_inn_buf_pos = (uchar*)(sampleIdxBuf + n);
1059         sampleIdx = data->get_sample_indices( data->data_root, sampleIdxBuf );
1060     }
1061     CvMat* buf = data->buf;
1062     if( !tree ) // before training the first tree, initialize weights and other parameters
1063     {
1064         int* classLabelsBuf = (int*)cur_inn_buf_pos; cur_inn_buf_pos = (uchar*)(classLabelsBuf + n);
1065         const int* classLabels = data->get_class_labels(data->data_root, classLabelsBuf);
1066         // in case of logitboost and gentle adaboost each weak tree is a regression tree,
1067         // so we need to convert class labels to floating-point values
1068         double w0 = 1./n;
1069         double p[2] = { 1, 1 };
1070
1071         cvReleaseMat( &orig_response );
1072         cvReleaseMat( &sum_response );
1073         cvReleaseMat( &weak_eval );
1074         cvReleaseMat( &subsample_mask );
1075         cvReleaseMat( &weights );
1076
1077         orig_response = cvCreateMat( 1, n, CV_32S );
1078         weak_eval = cvCreateMat( 1, n, CV_64F );
1079         subsample_mask = cvCreateMat( 1, n, CV_8U );
1080         weights = cvCreateMat( 1, n, CV_64F );
1081         subtree_weights = cvCreateMat( 1, n + 2, CV_64F );
1082
1083         if (data->is_buf_16u)
1084         {
1085             unsigned short* labels = (unsigned short*)(buf->data.s + data->data_root->buf_idx*buf->cols +
1086                 data->data_root->offset + (data->work_var_count-1)*data->sample_count);
1087             for( int i = 0; i < n; i++ )
1088             {
1089                 // save original categorical responses {0,1}, convert them to {-1,1}
1090                 orig_response->data.i[i] = classLabels[i]*2 - 1;
1091                 // make all the samples active at start.
1092                 // later, in trim_weights() deactivate/reactive again some, if need
1093                 subsample_mask->data.ptr[i] = (uchar)1;
1094                 // make all the initial weights the same.
1095                 weights->data.db[i] = w0*p[classLabels[i]];
1096                 // set the labels to find (from within weak tree learning proc)
1097                 // the particular sample weight, and where to store the response.
1098                 labels[i] = (unsigned short)i;
1099             }
1100         }
1101         else
1102         {
1103             int* labels = buf->data.i + data->data_root->buf_idx*buf->cols +
1104                 data->data_root->offset + (data->work_var_count-1)*data->sample_count;
1105
1106             for( int i = 0; i < n; i++ )
1107             {
1108                 // save original categorical responses {0,1}, convert them to {-1,1}
1109                 orig_response->data.i[i] = classLabels[i]*2 - 1;
1110                 subsample_mask->data.ptr[i] = (uchar)1;
1111                 weights->data.db[i] = w0*p[classLabels[i]];
1112                 labels[i] = i;
1113             }
1114         }
1115
1116         if( params.boost_type == LOGIT )
1117         {
1118             sum_response = cvCreateMat( 1, n, CV_64F );
1119
1120             for( int i = 0; i < n; i++ )
1121             {
1122                 sum_response->data.db[i] = 0;
1123                 fdata[sampleIdx[i]*step] = orig_response->data.i[i] > 0 ? 2.f : -2.f;
1124             }
1125
1126             // in case of logitboost each weak tree is a regression tree.
1127             // the target function values are recalculated for each of the trees
1128             data->is_classifier = false;
1129         }
1130         else if( params.boost_type == GENTLE )
1131         {
1132             for( int i = 0; i < n; i++ )
1133                 fdata[sampleIdx[i]*step] = (float)orig_response->data.i[i];
1134
1135             data->is_classifier = false;
1136         }
1137     }
1138     else
1139     {
1140         // at this moment, for all the samples that participated in the training of the most
1141         // recent weak classifier we know the responses. For other samples we need to compute them
1142         if( have_subsample )
1143         {
1144             // invert the subsample mask
1145             cvXorS( subsample_mask, cvScalar(1.), subsample_mask );
1146
1147             // run tree through all the non-processed samples
1148             for( int i = 0; i < n; i++ )
1149                 if( subsample_mask->data.ptr[i] )
1150                 {
1151                     weak_eval->data.db[i] = ((CvCascadeBoostTree*)tree)->predict( i )->value;
1152                 }
1153         }
1154
1155         // now update weights and other parameters for each type of boosting
1156         if( params.boost_type == DISCRETE )
1157         {
1158             // Discrete AdaBoost:
1159             //   weak_eval[i] (=f(x_i)) is in {-1,1}
1160             //   err = sum(w_i*(f(x_i) != y_i))/sum(w_i)
1161             //   C = log((1-err)/err)
1162             //   w_i *= exp(C*(f(x_i) != y_i))
1163
1164             double C, err = 0.;
1165             double scale[] = { 1., 0. };
1166
1167             for( int i = 0; i < n; i++ )
1168             {
1169                 double w = weights->data.db[i];
1170                 sumW += w;
1171                 err += w*(weak_eval->data.db[i] != orig_response->data.i[i]);
1172             }
1173
1174             if( sumW != 0 )
1175                 err /= sumW;
1176             C = err = -logRatio( err );
1177             scale[1] = exp(err);
1178
1179             sumW = 0;
1180             for( int i = 0; i < n; i++ )
1181             {
1182                 double w = weights->data.db[i]*
1183                     scale[weak_eval->data.db[i] != orig_response->data.i[i]];
1184                 sumW += w;
1185                 weights->data.db[i] = w;
1186             }
1187
1188             tree->scale( C );
1189         }
1190         else if( params.boost_type == REAL )
1191         {
1192             // Real AdaBoost:
1193             //   weak_eval[i] = f(x_i) = 0.5*log(p(x_i)/(1-p(x_i))), p(x_i)=P(y=1|x_i)
1194             //   w_i *= exp(-y_i*f(x_i))
1195
1196             for( int i = 0; i < n; i++ )
1197                 weak_eval->data.db[i] *= -orig_response->data.i[i];
1198
1199             cvExp( weak_eval, weak_eval );
1200
1201             for( int i = 0; i < n; i++ )
1202             {
1203                 double w = weights->data.db[i]*weak_eval->data.db[i];
1204                 sumW += w;
1205                 weights->data.db[i] = w;
1206             }
1207         }
1208         else if( params.boost_type == LOGIT )
1209         {
1210             // LogitBoost:
1211             //   weak_eval[i] = f(x_i) in [-z_max,z_max]
1212             //   sum_response = F(x_i).
1213             //   F(x_i) += 0.5*f(x_i)
1214             //   p(x_i) = exp(F(x_i))/(exp(F(x_i)) + exp(-F(x_i))=1/(1+exp(-2*F(x_i)))
1215             //   reuse weak_eval: weak_eval[i] <- p(x_i)
1216             //   w_i = p(x_i)*1(1 - p(x_i))
1217             //   z_i = ((y_i+1)/2 - p(x_i))/(p(x_i)*(1 - p(x_i)))
1218             //   store z_i to the data->data_root as the new target responses
1219
1220             const double lbWeightThresh = FLT_EPSILON;
1221             const double lbZMax = 10.;
1222
1223             for( int i = 0; i < n; i++ )
1224             {
1225                 double s = sum_response->data.db[i] + 0.5*weak_eval->data.db[i];
1226                 sum_response->data.db[i] = s;
1227                 weak_eval->data.db[i] = -2*s;
1228             }
1229
1230             cvExp( weak_eval, weak_eval );
1231
1232             for( int i = 0; i < n; i++ )
1233             {
1234                 double p = 1./(1. + weak_eval->data.db[i]);
1235                 double w = p*(1 - p), z;
1236                 w = MAX( w, lbWeightThresh );
1237                 weights->data.db[i] = w;
1238                 sumW += w;
1239                 if( orig_response->data.i[i] > 0 )
1240                 {
1241                     z = 1./p;
1242                     fdata[sampleIdx[i]*step] = (float)min(z, lbZMax);
1243                 }
1244                 else
1245                 {
1246                     z = 1./(1-p);
1247                     fdata[sampleIdx[i]*step] = (float)-min(z, lbZMax);
1248                 }
1249             }
1250         }
1251         else
1252         {
1253             // Gentle AdaBoost:
1254             //   weak_eval[i] = f(x_i) in [-1,1]
1255             //   w_i *= exp(-y_i*f(x_i))
1256             assert( params.boost_type == GENTLE );
1257
1258             for( int i = 0; i < n; i++ )
1259                 weak_eval->data.db[i] *= -orig_response->data.i[i];
1260
1261             cvExp( weak_eval, weak_eval );
1262
1263             for( int i = 0; i < n; i++ )
1264             {
1265                 double w = weights->data.db[i] * weak_eval->data.db[i];
1266                 weights->data.db[i] = w;
1267                 sumW += w;
1268             }
1269         }
1270     }
1271
1272     // renormalize weights
1273     if( sumW > FLT_EPSILON )
1274     {
1275         sumW = 1./sumW;
1276         for( int i = 0; i < n; ++i )
1277             weights->data.db[i] *= sumW;
1278     }
1279 }
1280
1281 bool CvCascadeBoost::isErrDesired()
1282 {
1283     int sCount = data->sample_count,
1284         numPos = 0, numNeg = 0, numFalse = 0, numPosTrue = 0;
1285     float* eval = (float*) cvStackAlloc( sizeof(eval[0]) * sCount );
1286
1287     for( int i = 0; i < sCount; i++ )
1288         if( ((CvCascadeBoostTrainData*)data)->featureEvaluator->getCls( i ) == 1.0F )
1289             eval[numPos++] = predict( i, true );
1290     icvSortFlt( eval, numPos, 0 );
1291     int thresholdIdx = (int)((1.0F - minHitRate) * numPos);
1292     threshold = eval[ thresholdIdx ];
1293     numPosTrue = numPos - thresholdIdx;
1294     for( int i = thresholdIdx - 1; i >= 0; i--)
1295         if ( abs( eval[i] - threshold) < FLT_EPSILON )
1296             numPosTrue++;
1297     float hitRate = ((float) numPosTrue) / ((float) numPos);
1298
1299     for( int i = 0; i < sCount; i++ )
1300     {
1301         if( ((CvCascadeBoostTrainData*)data)->featureEvaluator->getCls( i ) == 0.0F )
1302         {
1303             numNeg++;
1304             if( predict( i ) )
1305                 numFalse++;
1306         }
1307     }
1308     float falseAlarm = ((float) numFalse) / ((float) numNeg);
1309
1310     cout << "|"; cout.width(4); cout << right << weak->total;
1311     cout << "|"; cout.width(9); cout << right << hitRate;
1312     cout << "|"; cout.width(9); cout << right << falseAlarm;
1313     cout << "|" << endl;
1314     cout << "+----+---------+---------+" << endl;
1315
1316     return falseAlarm <= maxFalseAlarm;
1317 }
1318
1319 void CvCascadeBoost::write( FileStorage &fs, const Mat& featureMap ) const
1320 {
1321 //    char cmnt[30];
1322     CvCascadeBoostTree* weakTree;
1323     fs << CC_WEAK_COUNT << weak->total;
1324     fs << CC_STAGE_THRESHOLD << threshold;
1325     fs << CC_WEAK_CLASSIFIERS << "[";
1326     for( int wi = 0; wi < weak->total; wi++)
1327     {
1328         /*sprintf( cmnt, "tree %i", wi );
1329         cvWriteComment( fs, cmnt, 0 );*/
1330         weakTree = *((CvCascadeBoostTree**) cvGetSeqElem( weak, wi ));
1331         weakTree->write( fs, featureMap );
1332     }
1333     fs << "]";
1334 }
1335
1336 bool CvCascadeBoost::read( const FileNode &node,
1337                            const CvFeatureEvaluator* _featureEvaluator,
1338                            const CvCascadeBoostParams& _params )
1339 {
1340     CvMemStorage* storage;
1341     clear();
1342     data = new CvCascadeBoostTrainData( _featureEvaluator, _params );
1343     set_params( _params );
1344
1345     node[CC_STAGE_THRESHOLD] >> threshold;
1346     FileNode rnode = node[CC_WEAK_CLASSIFIERS];
1347
1348     storage = cvCreateMemStorage();
1349     weak = cvCreateSeq( 0, sizeof(CvSeq), sizeof(CvBoostTree*), storage );
1350     for( FileNodeIterator it = rnode.begin(); it != rnode.end(); it++ )
1351     {
1352         CvCascadeBoostTree* tree = new CvCascadeBoostTree();
1353         tree->read( *it, this, data );
1354         cvSeqPush( weak, &tree );
1355     }
1356     return true;
1357 }
1358
1359 void CvCascadeBoost::markUsedFeaturesInMap( Mat& featureMap )
1360 {
1361     for( int wi = 0; wi < weak->total; wi++ )
1362     {
1363         CvCascadeBoostTree* weakTree = *((CvCascadeBoostTree**) cvGetSeqElem( weak, wi ));
1364         weakTree->markFeaturesInMap( featureMap );
1365     }
1366 }