buf = cvCreateMat( buf_count, buf_size, CV_32SC1 );
cat_count = cvCreateMat( 1, cat_var_count + 1, CV_32SC1 );
- pred_float_buf.resize(maxNumThreads);
- pred_int_buf.resize(maxNumThreads);
- resp_float_buf.resize(maxNumThreads);
- resp_int_buf.resize(maxNumThreads);
- cv_lables_buf.resize(maxNumThreads);
- sample_idx_buf.resize(maxNumThreads);
- for( int ti = 0; ti < maxNumThreads; ti++ )
- {
- pred_float_buf[ti].resize(sample_count);
- pred_int_buf[ti].resize(sample_count);
- resp_float_buf[ti].resize(sample_count);
- resp_int_buf[ti].resize(sample_count);
- cv_lables_buf[ti].resize(sample_count);
- sample_idx_buf[ti].resize(sample_count);
- }
// precalculate valCache and set indices in buf
precalculate();
valCache.release();
}
-void CvCascadeBoostTrainData::get_class_labels( CvDTreeNode* n, int* labelsBuf, const int** labels )
+const int* CvCascadeBoostTrainData::get_class_labels( CvDTreeNode* n, int* labelsBuf)
{
int nodeSampleCount = n->sample_count;
- int* sampleIndicesBuf = get_sample_idx_buf();
- const int* sampleIndices = 0;
int rStep = CV_IS_MAT_CONT( responses->type ) ? 1 : responses->step / CV_ELEM_SIZE( responses->type );
- get_sample_indices(n, sampleIndicesBuf, &sampleIndices);
-
+ int* sampleIndicesBuf = labelsBuf; //
+ const int* sampleIndices = get_sample_indices(n, sampleIndicesBuf);
for( int si = 0; si < nodeSampleCount; si++ )
{
int sidx = sampleIndices[si];
labelsBuf[si] = (int)responses->data.fl[sidx*rStep];
}
- *labels = labelsBuf;
+ return labelsBuf;
}
-void CvCascadeBoostTrainData::get_sample_indices( CvDTreeNode* n, int* indicesBuf, const int** indices )
+const int* CvCascadeBoostTrainData::get_sample_indices( CvDTreeNode* n, int* indicesBuf )
{
- CvDTreeTrainData::get_cat_var_data( n, get_work_var_count(), indicesBuf, indices );
+ return CvDTreeTrainData::get_cat_var_data( n, get_work_var_count(), indicesBuf );
}
-void CvCascadeBoostTrainData::get_cv_labels( CvDTreeNode* n, int* labels_buf, const int** labels )
+const int* CvCascadeBoostTrainData::get_cv_labels( CvDTreeNode* n, int* labels_buf )
{
- CvDTreeTrainData::get_cat_var_data( n, get_work_var_count()- 1, labels_buf, labels );
+ return CvDTreeTrainData::get_cat_var_data( n, get_work_var_count() - 1, labels_buf );
}
-int CvCascadeBoostTrainData::get_ord_var_data( CvDTreeNode* n, int vi, float* ordValuesBuf, int* indicesBuf,
- const float** ordValues, const int** indices )
+void CvCascadeBoostTrainData::get_ord_var_data( CvDTreeNode* n, int vi, float* ordValuesBuf, int* sortedIndicesBuf,
+ const float** ordValues, const int** sortedIndices, int* sampleIndicesBuf )
{
int nodeSampleCount = n->sample_count;
- int* sampleIndicesBuf = get_sample_idx_buf();
- const int* sampleIndices = 0;
- get_sample_indices(n, sampleIndicesBuf, &sampleIndices);
+ const int* sampleIndices = get_sample_indices(n, sampleIndicesBuf);
if ( vi < numPrecalcIdx )
{
if( !is_buf_16u )
- *indices = buf->data.i + n->buf_idx*buf->cols + vi*sample_count + n->offset;
+ *sortedIndices = buf->data.i + n->buf_idx*buf->cols + vi*sample_count + n->offset;
else
{
const unsigned short* shortIndices = (const unsigned short*)(buf->data.s + n->buf_idx*buf->cols +
vi*sample_count + n->offset );
for( int i = 0; i < nodeSampleCount; i++ )
- indicesBuf[i] = shortIndices[i];
- *indices = indicesBuf;
+ sortedIndicesBuf[i] = shortIndices[i];
+ *sortedIndices = sortedIndicesBuf;
}
if ( vi < numPrecalcVal )
{
for( int i = 0; i < nodeSampleCount; i++ )
{
- int idx = (*indices)[i];
+ int idx = (*sortedIndices)[i];
idx = sampleIndices[idx];
ordValuesBuf[i] = valCache.at<float>( vi, idx);
}
{
for( int i = 0; i < nodeSampleCount; i++ )
{
- int idx = (*indices)[i];
+ int idx = (*sortedIndices)[i];
idx = sampleIndices[idx];
ordValuesBuf[i] = (*featureEvaluator)( vi, idx);
}
{
for( int i = 0; i < nodeSampleCount; i++ )
{
- indicesBuf[i] = i;
+ sortedIndicesBuf[i] = i;
((float*)sampleIndices)[i] = valCache.at<float>( vi, sampleIndices[i] );
}
}
{
for( int i = 0; i < nodeSampleCount; i++ )
{
- indicesBuf[i] = i;
+ sortedIndicesBuf[i] = i;
((float*)sampleIndices)[i] = (*featureEvaluator)( vi, sampleIndices[i]);
}
}
- icvSortIntAux( indicesBuf, sample_count, (float *)sampleIndices );
+ icvSortIntAux( sortedIndicesBuf, sample_count, (float *)sampleIndices );
for( int i = 0; i < nodeSampleCount; i++ )
- ordValuesBuf[i] = ((float*)sampleIndices)[indicesBuf[i]];
- *indices = indicesBuf;
+ ordValuesBuf[i] = ((float*)sampleIndices)[sortedIndicesBuf[i]];
+ *sortedIndices = sortedIndicesBuf;
}
*ordValues = ordValuesBuf;
- return 0;
}
-int CvCascadeBoostTrainData::get_cat_var_data( CvDTreeNode* n, int vi, int* catValuesBuf, const int** catValues )
+const int* CvCascadeBoostTrainData::get_cat_var_data( CvDTreeNode* n, int vi, int* catValuesBuf)
{
int nodeSampleCount = n->sample_count;
- int* sampleIndicesBuf = get_sample_idx_buf();
- const int* sampleIndices = 0;
- get_sample_indices(n, sampleIndicesBuf, &sampleIndices);
+ int* sampleIndicesBuf = catValuesBuf; //
+ const int* sampleIndices = get_sample_indices(n, sampleIndicesBuf);
if ( vi < numPrecalcVal )
{
for( int i = 0; i < nodeSampleCount; i++ )
catValuesBuf[i] = (int)(*featureEvaluator)( vi, sampleIndices[i] );
}
- *catValues = catValuesBuf;
-
- return 0;
+ return catValuesBuf;
}
float CvCascadeBoostTrainData::getVarValue( int vi, int si )
int newBufIdx = data->get_child_buf_idx( node );
int workVarCount = data->get_work_var_count();
CvMat* buf = data->buf;
- int* tempBuf = (int*)cvStackAlloc(n*sizeof(tempBuf[0]));
+ cv::AutoBuffer<uchar> inn_buf(n*(3*sizeof(int)+sizeof(float)));
+ int* tempBuf = (int*)(uchar*)inn_buf;
bool splitInputData;
complete_node_dir(node);
(node->left->sample_count > data->params.min_sample_count ||
node->right->sample_count > data->params.min_sample_count);
- // split ordered variables, keep both halves sorted.
+ // split ordered variables, keep both halves sorted.
for( int vi = 0; vi < ((CvCascadeBoostTrainData*)data)->numPrecalcIdx; vi++ )
{
int ci = data->get_var_type(vi);
- int n1 = node->get_num_valid(vi);
- int *src_idx_buf = data->get_pred_int_buf();
- const int* src_idx = 0;
- float *src_val_buf = data->get_pred_float_buf();
- const float* src_val = 0;
-
if( ci >= 0 || !splitInputData )
continue;
- data->get_ord_var_data(node, vi, src_val_buf, src_idx_buf, &src_val, &src_idx);
+ int n1 = node->get_num_valid(vi);
+ float *src_val_buf = (float*)(tempBuf + n);
+ int *src_sorted_idx_buf = (int*)(src_val_buf + n);
+ int *src_sample_idx_buf = src_sorted_idx_buf + n;
+ const int* src_sorted_idx = 0;
+ const float* src_val = 0;
+ data->get_ord_var_data(node, vi, src_val_buf, src_sorted_idx_buf, &src_val, &src_sorted_idx, src_sample_idx_buf);
for(int i = 0; i < n; i++)
- tempBuf[i] = src_idx[i];
+ tempBuf[i] = src_sorted_idx[i];
if (data->is_buf_16u)
{
}
// split cv_labels using newIdx relocation table
- int *src_lbls_buf = data->get_pred_int_buf();
- const int* src_lbls = 0;
- data->get_cv_labels(node, src_lbls_buf, &src_lbls);
+ int *src_lbls_buf = tempBuf + n;
+ const int* src_lbls = data->get_cv_labels(node, src_lbls_buf);
for(int i = 0; i < n; i++)
tempBuf[i] = src_lbls[i];
}
// split sample indices
- int *sampleIdx_src_buf = data->get_sample_idx_buf();
- const int* sampleIdx_src = 0;
- data->get_sample_indices(node, sampleIdx_src_buf, &sampleIdx_src);
+ int *sampleIdx_src_buf = tempBuf + n;
+ const int* sampleIdx_src = data->get_sample_indices(node, sampleIdx_src_buf);
for(int i = 0; i < n; i++)
tempBuf[i] = sampleIdx_src[i];
float* fdata = 0;
int *sampleIdxBuf;
const int* sampleIdx = 0;
+ int inn_buf_size = ((params.boost_type == LOGIT) || (params.boost_type == GENTLE) ? n*sizeof(int) : 0) +
+ ( !tree ? n*sizeof(int) : 0 );
+ cv::AutoBuffer<uchar> inn_buf(inn_buf_size);
+ uchar* cur_inn_buf_pos = (uchar*)inn_buf;
if ( (params.boost_type == LOGIT) || (params.boost_type == GENTLE) )
{
step = CV_IS_MAT_CONT(data->responses_copy->type) ?
1 : data->responses_copy->step / CV_ELEM_SIZE(data->responses_copy->type);
fdata = data->responses_copy->data.fl;
- sampleIdxBuf = (int*)cvStackAlloc(data->sample_count*sizeof(sampleIdxBuf[0]));
- data->get_sample_indices( data->data_root, sampleIdxBuf, &sampleIdx );
+ sampleIdxBuf = (int*)cur_inn_buf_pos; cur_inn_buf_pos = (uchar*)(sampleIdxBuf + n);
+ sampleIdx = data->get_sample_indices( data->data_root, sampleIdxBuf );
}
CvMat* buf = data->buf;
if( !tree ) // before training the first tree, initialize weights and other parameters
{
- int* classLabelsBuf = data->get_resp_int_buf();
- const int* classLabels = 0;
- data->get_class_labels(data->data_root, classLabelsBuf, &classLabels);
+ int* classLabelsBuf = (int*)cur_inn_buf_pos; cur_inn_buf_pos = (uchar*)(classLabelsBuf + n);
+ const int* classLabels = data->get_class_labels(data->data_root, classLabelsBuf);
// in case of logitboost and gentle adaboost each weak tree is a regression tree,
// so we need to convert class labels to floating-point values
- float* responses_buf = data->get_resp_float_buf();
- const float* responses = 0;
- data->get_ord_responses(data->data_root, responses_buf, &responses);
-
double w0 = 1./n;
double p[2] = { 1, 1 };
const double lbWeightThresh = FLT_EPSILON;
const double lbZMax = 10.;
- float* responsesBuf = data->get_resp_float_buf();
- const float* responses = 0;
- data->get_ord_responses(data->data_root, responsesBuf, &responses);
for( int i = 0; i < n; i++ )
{
const CvDTreeParams& _params=CvDTreeParams() );
void precalculate();
- virtual void get_class_labels( CvDTreeNode* n, int* labelsBuf, const int** labels );
- virtual void get_cv_labels( CvDTreeNode* n, int* labelsBuf, const int** labels );
- virtual void get_sample_indices( CvDTreeNode* n, int* indicesBuf, const int** labels );
+ virtual const int* get_class_labels( CvDTreeNode* n, int* labelsBuf );
+ virtual const int* get_cv_labels( CvDTreeNode* n, int* labelsBuf);
+ virtual const int* get_sample_indices( CvDTreeNode* n, int* indicesBuf );
- virtual int get_ord_var_data( CvDTreeNode* n, int vi, float* ordValuesBuf, int* indicesBuf,
- const float** ordValues, const int** indices );
- virtual int get_cat_var_data( CvDTreeNode* n, int vi, int* catValuesBuf, const int** catValues );
+ virtual void get_ord_var_data( CvDTreeNode* n, int vi, float* ordValuesBuf, int* sortedIndicesBuf,
+ const float** ordValues, const int** sortedIndices, int* sampleIndicesBuf );
+ virtual const int* get_cat_var_data( CvDTreeNode* n, int vi, int* catValuesBuf );
virtual float getVarValue( int vi, int si );
virtual void free_train_data();
// that interferes with a method definiton in this header
#undef check
+#include "cvinternal.h"
+
/****************************************************************************************\
* Main struct definitions *
\****************************************************************************************/
int get_var_type(int vi) const;
int get_work_var_count() const {return work_var_count;}
- virtual void get_ord_responses( CvDTreeNode* n, float* values_buf, const float** values );
- virtual void get_class_labels( CvDTreeNode* n, int* labels_buf, const int** labels );
- virtual void get_cv_labels( CvDTreeNode* n, int* labels_buf, const int** labels );
- virtual void get_sample_indices( CvDTreeNode* n, int* indices_buf, const int** labels );
- virtual int get_cat_var_data( CvDTreeNode* n, int vi, int* cat_values_buf, const int** cat_values );
- virtual int get_ord_var_data( CvDTreeNode* n, int vi, float* ord_values_buf, int* indices_buf,
- const float** ord_values, const int** indices );
+ virtual const float* get_ord_responses( CvDTreeNode* n, float* values_buf, int* sample_indices_buf );
+ virtual const int* get_class_labels( CvDTreeNode* n, int* labels_buf );
+ virtual const int* get_cv_labels( CvDTreeNode* n, int* labels_buf );
+ virtual const int* get_sample_indices( CvDTreeNode* n, int* indices_buf );
+ virtual const int* get_cat_var_data( CvDTreeNode* n, int vi, int* cat_values_buf );
+ virtual void get_ord_var_data( CvDTreeNode* n, int vi, float* ord_values_buf, int* sorted_indices_buf,
+ const float** ord_values, const int** sorted_indices, int* sample_indices_buf );
virtual int get_child_buf_idx( CvDTreeNode* n );
////////////////////////////////////
virtual void free_train_data();
virtual void free_node( CvDTreeNode* node );
- // inner arrays for getting predictors and responses
- float* get_pred_float_buf();
- int* get_pred_int_buf();
- float* get_resp_float_buf();
- int* get_resp_int_buf();
- int* get_cv_lables_buf();
- int* get_sample_idx_buf();
-
- std::vector<std::vector<float> > pred_float_buf;
- std::vector<std::vector<int> > pred_int_buf;
- std::vector<std::vector<float> > resp_float_buf;
- std::vector<std::vector<int> > resp_int_buf;
- std::vector<std::vector<int> > cv_lables_buf;
- std::vector<std::vector<int> > sample_idx_buf;
-
int sample_count, var_all, var_count, max_c_count;
int ord_var_count, cat_var_count, work_var_count;
bool have_labels, have_priors;
CvRNG rng;
};
+class CvDTree;
+class CvForestTree;
+
+namespace cv
+{
+ struct DTreeBestSplitFinder
+ {
+ DTreeBestSplitFinder(){ tree = 0; node = 0; }
+ DTreeBestSplitFinder( CvDTree* _tree, CvDTreeNode* _node);
+ DTreeBestSplitFinder( const DTreeBestSplitFinder& finder, Split );
+ virtual void operator()(const BlockedRange& range);
+ void join( DTreeBestSplitFinder& rhs );
+ Ptr<CvDTreeSplit> bestSplit;
+ Ptr<CvDTreeSplit> split;
+ int splitSize;
+ CvDTree* tree;
+ CvDTreeNode* node;
+ };
+
+ struct ForestTreeBestSplitFinder : DTreeBestSplitFinder
+ {
+ ForestTreeBestSplitFinder() : DTreeBestSplitFinder() {}
+ ForestTreeBestSplitFinder( CvForestTree* _tree, CvDTreeNode* _node );
+ ForestTreeBestSplitFinder( const ForestTreeBestSplitFinder& finder, Split );
+ virtual void operator()(const BlockedRange& range);
+ };
+}
+
class CV_EXPORTS CvDTree : public CvStatModel
{
CvDTreeTrainData* get_data();
protected:
+ friend struct cv::DTreeBestSplitFinder;
virtual bool do_train( const CvMat* _subsample_idx );
virtual void split_node_data( CvDTreeNode* n );
virtual CvDTreeSplit* find_best_split( CvDTreeNode* n );
virtual CvDTreeSplit* find_split_ord_class( CvDTreeNode* n, int vi,
- float init_quality = 0, CvDTreeSplit* _split = 0 );
+ float init_quality = 0, CvDTreeSplit* _split = 0, uchar* ext_buf = 0 );
virtual CvDTreeSplit* find_split_cat_class( CvDTreeNode* n, int vi,
- float init_quality = 0, CvDTreeSplit* _split = 0 );
+ float init_quality = 0, CvDTreeSplit* _split = 0, uchar* ext_buf = 0 );
virtual CvDTreeSplit* find_split_ord_reg( CvDTreeNode* n, int vi,
- float init_quality = 0, CvDTreeSplit* _split = 0 );
+ float init_quality = 0, CvDTreeSplit* _split = 0, uchar* ext_buf = 0 );
virtual CvDTreeSplit* find_split_cat_reg( CvDTreeNode* n, int vi,
- float init_quality = 0, CvDTreeSplit* _split = 0 );
- virtual CvDTreeSplit* find_surrogate_split_ord( CvDTreeNode* n, int vi );
- virtual CvDTreeSplit* find_surrogate_split_cat( CvDTreeNode* n, int vi );
+ float init_quality = 0, CvDTreeSplit* _split = 0, uchar* ext_buf = 0 );
+ virtual CvDTreeSplit* find_surrogate_split_ord( CvDTreeNode* n, int vi, uchar* ext_buf = 0 );
+ virtual CvDTreeSplit* find_surrogate_split_cat( CvDTreeNode* n, int vi, uchar* ext_buf = 0 );
virtual double calc_node_dir( CvDTreeNode* node );
virtual void complete_node_dir( CvDTreeNode* node );
virtual void cluster_categories( const int* vectors, int vector_count,
/* dummy methods to avoid warnings: END */
protected:
+ friend struct cv::ForestTreeBestSplitFinder;
+
virtual CvDTreeSplit* find_best_split( CvDTreeNode* n );
CvRTrees* forest;
};
const CvDTreeParams& _params=CvDTreeParams(),
bool _shared=false, bool _add_labels=false,
bool _update_data=false );
- virtual int get_ord_var_data( CvDTreeNode* n, int vi, float* ord_values_buf, int* missing_buf,
- const float** ord_values, const int** missing );
- virtual void get_sample_indices( CvDTreeNode* n, int* indices_buf, const int** indices );
- virtual void get_cv_labels( CvDTreeNode* n, int* labels_buf, const int** labels );
- virtual int get_cat_var_data( CvDTreeNode* n, int vi, int* cat_values_buf, const int** cat_values );
- virtual void get_vectors( const CvMat* _subsample_idx,
- float* values, uchar* missing, float* responses, bool get_class_idx=false );
+ virtual void get_ord_var_data( CvDTreeNode* n, int vi, float* ord_values_buf, int* missing_buf,
+ const float** ord_values, const int** missing, int* sample_buf = 0 );
+ virtual const int* get_sample_indices( CvDTreeNode* n, int* indices_buf );
+ virtual const int* get_cv_labels( CvDTreeNode* n, int* labels_buf );
+ virtual const int* get_cat_var_data( CvDTreeNode* n, int vi, int* cat_values_buf );
+ virtual void get_vectors( const CvMat* _subsample_idx, float* values, uchar* missing,
+ float* responses, bool get_class_idx=false );
virtual CvDTreeNode* subsample_data( const CvMat* _subsample_idx );
const CvMat* missing_mask;
};
protected:
virtual double calc_node_dir( CvDTreeNode* node );
virtual CvDTreeSplit* find_split_ord_class( CvDTreeNode* n, int vi,
- float init_quality = 0, CvDTreeSplit* _split = 0 );
+ float init_quality = 0, CvDTreeSplit* _split = 0, uchar* ext_buf = 0 );
virtual CvDTreeSplit* find_split_cat_class( CvDTreeNode* n, int vi,
- float init_quality = 0, CvDTreeSplit* _split = 0 );
+ float init_quality = 0, CvDTreeSplit* _split = 0, uchar* ext_buf = 0 );
virtual CvDTreeSplit* find_split_ord_reg( CvDTreeNode* n, int vi,
- float init_quality = 0, CvDTreeSplit* _split = 0 );
+ float init_quality = 0, CvDTreeSplit* _split = 0, uchar* ext_buf = 0 );
virtual CvDTreeSplit* find_split_cat_reg( CvDTreeNode* n, int vi,
- float init_quality = 0, CvDTreeSplit* _split = 0 );
- //virtual void complete_node_dir( CvDTreeNode* node );
+ float init_quality = 0, CvDTreeSplit* _split = 0, uchar* ext_buf = 0 );
virtual void split_node_data( CvDTreeNode* n );
};
protected:
virtual void try_split_node( CvDTreeNode* n );
- virtual CvDTreeSplit* find_surrogate_split_ord( CvDTreeNode* n, int vi );
- virtual CvDTreeSplit* find_surrogate_split_cat( CvDTreeNode* n, int vi );
+ virtual CvDTreeSplit* find_surrogate_split_ord( CvDTreeNode* n, int vi, uchar* ext_buf = 0 );
+ virtual CvDTreeSplit* find_surrogate_split_cat( CvDTreeNode* n, int vi, uchar* ext_buf = 0 );
virtual CvDTreeSplit* find_split_ord_class( CvDTreeNode* n, int vi,
- float init_quality = 0, CvDTreeSplit* _split = 0 );
+ float init_quality = 0, CvDTreeSplit* _split = 0, uchar* ext_buf = 0 );
virtual CvDTreeSplit* find_split_cat_class( CvDTreeNode* n, int vi,
- float init_quality = 0, CvDTreeSplit* _split = 0 );
+ float init_quality = 0, CvDTreeSplit* _split = 0, uchar* ext_buf = 0 );
virtual CvDTreeSplit* find_split_ord_reg( CvDTreeNode* n, int vi,
- float init_quality = 0, CvDTreeSplit* _split = 0 );
+ float init_quality = 0, CvDTreeSplit* _split = 0, uchar* ext_buf = 0 );
virtual CvDTreeSplit* find_split_cat_reg( CvDTreeNode* n, int vi,
- float init_quality = 0, CvDTreeSplit* _split = 0 );
+ float init_quality = 0, CvDTreeSplit* _split = 0, uchar* ext_buf = 0 );
virtual void calc_node_value( CvDTreeNode* n );
virtual double calc_node_dir( CvDTreeNode* n );
ensemble = _ensemble;
data = _train_data;
data->shared = true;
-
return do_train( _subsample_idx );
}
// if the node has not been split,
// store the responses for the corresponding training samples
double* weak_eval = ensemble->get_weak_response()->data.db;
- int* labels_buf = data->get_cv_lables_buf();
- const int* labels = 0;
- data->get_cv_labels( node, labels_buf, &labels );
+ cv::AutoBuffer<int> inn_buf(node->sample_count);
+ const int* labels = data->get_cv_labels( node, (int*)inn_buf );
int i, count = node->sample_count;
double value = node->value;
if( data->get_var_type(vi) >= 0 ) // split on categorical var
{
- int* cat_labels_buf = data->get_pred_int_buf();
- const int* cat_labels = 0;
- data->get_cat_var_data( node, vi, cat_labels_buf, &cat_labels );
+ cv::AutoBuffer<int> inn_buf(n);
+ const int* cat_labels = data->get_cat_var_data( node, vi, (int*)inn_buf );
const int* subset = node->split->subset;
double sum = 0, sum_abs = 0;
}
else // split on ordered var
{
- float* values_buf = data->get_pred_float_buf();
+ cv::AutoBuffer<uchar> inn_buf(2*n*sizeof(int)+n*sizeof(float));
+ float* values_buf = (float*)(uchar*)inn_buf;
+ int* sorted_indices_buf = (int*)(values_buf + n);
+ int* sample_indices_buf = sorted_indices_buf + n;
const float* values = 0;
- int* indices_buf = data->get_pred_int_buf();
- const int* indices = 0;
- data->get_ord_var_data( node, vi, values_buf, indices_buf, &values, &indices );
+ const int* sorted_indices = 0;
+ data->get_ord_var_data( node, vi, values_buf, sorted_indices_buf, &values, &sorted_indices, sample_indices_buf );
int split_point = node->split->ord.split_point;
int n1 = node->get_num_valid(vi);
for( i = 0; i <= split_point; i++ )
{
- int idx = indices[i];
+ int idx = sorted_indices[i];
double w = weights[idx];
dir[idx] = (char)-1;
L += w;
for( ; i < n1; i++ )
{
- int idx = indices[i];
+ int idx = sorted_indices[i];
double w = weights[idx];
dir[idx] = (char)1;
R += w;
}
for( ; i < n; i++ )
- dir[indices[i]] = (char)0;
+ dir[sorted_indices[i]] = (char)0;
}
node->maxlr = MAX( L, R );
CvDTreeSplit*
-CvBoostTree::find_split_ord_class( CvDTreeNode* node, int vi, float init_quality, CvDTreeSplit* _split )
+CvBoostTree::find_split_ord_class( CvDTreeNode* node, int vi, float init_quality,
+ CvDTreeSplit* _split, uchar* _ext_buf )
{
const float epsilon = FLT_EPSILON*2;
const double* weights = ensemble->get_subtree_weights()->data.db;
int n = node->sample_count;
int n1 = node->get_num_valid(vi);
- float* values_buf = data->get_pred_float_buf();
+
+ cv::AutoBuffer<uchar> inn_buf;
+ if( !_ext_buf )
+ inn_buf.allocate(n*(3*sizeof(int)+sizeof(float)));
+ uchar* ext_buf = _ext_buf ? _ext_buf : (uchar*)inn_buf;
+ float* values_buf = (float*)ext_buf;
+ int* sorted_indices_buf = (int*)(values_buf + n);
+ int* sample_indices_buf = sorted_indices_buf + n;
const float* values = 0;
- int* indices_buf = data->get_pred_int_buf();
- const int* indices = 0;
- data->get_ord_var_data( node, vi, values_buf, indices_buf, &values, &indices );
- int* responses_buf = data->get_resp_int_buf();
- const int* responses = 0;
- data->get_class_labels( node, responses_buf, &responses);
+ const int* sorted_indices = 0;
+ data->get_ord_var_data( node, vi, values_buf, sorted_indices_buf, &values, &sorted_indices, sample_indices_buf );
+ int* responses_buf = sorted_indices_buf + n;
+ const int* responses = data->get_class_labels( node, responses_buf );
const double* rcw0 = weights + n;
double lcw[2] = {0,0}, rcw[2];
int i, best_i = -1;
rcw[0] = rcw0[0]; rcw[1] = rcw0[1];
for( i = n1; i < n; i++ )
{
- int idx = indices[i];
+ int idx = sorted_indices[i];
double w = weights[idx];
rcw[responses[idx]] -= w;
}
for( i = 0; i < n1 - 1; i++ )
{
- int idx = indices[i];
+ int idx = sorted_indices[i];
double w = weights[idx], w2 = w*w;
double lv, rv;
idx = responses[idx];
{
for( i = 0; i < n1 - 1; i++ )
{
- int idx = indices[i];
+ int idx = sorted_indices[i];
double w = weights[idx];
idx = responses[idx];
lcw[idx] += w;
static CV_IMPLEMENT_QSORT_EX( icvSortDblPtr, double*, CV_CMP_NUM_PTR, int )
CvDTreeSplit*
-CvBoostTree::find_split_cat_class( CvDTreeNode* node, int vi, float init_quality, CvDTreeSplit* _split )
+CvBoostTree::find_split_cat_class( CvDTreeNode* node, int vi, float init_quality, CvDTreeSplit* _split, uchar* _ext_buf )
{
int ci = data->get_var_type(vi);
int n = node->sample_count;
int mi = data->cat_count->data.i[ci];
- int* cat_labels_buf = data->get_pred_int_buf();
- const int* cat_labels = 0;
- data->get_cat_var_data(node, vi, cat_labels_buf, &cat_labels);
- int* responses_buf = data->get_resp_int_buf();
- const int* responses = 0;
- data->get_class_labels(node, responses_buf, &responses);
+
+ int base_size = (2*mi+3)*sizeof(double) + mi*sizeof(double*);
+ cv::AutoBuffer<uchar> inn_buf((2*mi+3)*sizeof(double) + mi*sizeof(double*));
+ if( !_ext_buf)
+ inn_buf.allocate( base_size + 2*n*sizeof(int) );
+ uchar* base_buf = (uchar*)inn_buf;
+ uchar* ext_buf = _ext_buf ? _ext_buf : base_buf + base_size;
+
+ int* cat_labels_buf = (int*)ext_buf;
+ const int* cat_labels = data->get_cat_var_data(node, vi, cat_labels_buf);
+ int* responses_buf = cat_labels_buf + n;
+ const int* responses = data->get_class_labels(node, responses_buf);
double lcw[2]={0,0}, rcw[2]={0,0};
- double* cjk = (double*)cvStackAlloc(2*(mi+1)*sizeof(cjk[0]))+2;
+
+ double* cjk = (double*)cv::alignPtr(base_buf,sizeof(double))+2;
const double* weights = ensemble->get_subtree_weights()->data.db;
- double** dbl_ptr = (double**)cvStackAlloc( mi*sizeof(dbl_ptr[0]) );
+ double** dbl_ptr = (double**)(cjk + 2*mi);
int i, j, k, idx;
double L = 0, R;
double best_val = init_quality;
CvDTreeSplit*
-CvBoostTree::find_split_ord_reg( CvDTreeNode* node, int vi, float init_quality, CvDTreeSplit* _split )
+CvBoostTree::find_split_ord_reg( CvDTreeNode* node, int vi, float init_quality, CvDTreeSplit* _split, uchar* _ext_buf )
{
const float epsilon = FLT_EPSILON*2;
const double* weights = ensemble->get_subtree_weights()->data.db;
int n = node->sample_count;
int n1 = node->get_num_valid(vi);
- float* values_buf = data->get_pred_float_buf();
+ cv::AutoBuffer<uchar> inn_buf;
+ if( !_ext_buf )
+ inn_buf.allocate(2*n*(sizeof(int)+sizeof(float)));
+ uchar* ext_buf = _ext_buf ? _ext_buf : (uchar*)inn_buf;
+
+ float* values_buf = (float*)ext_buf;
+ int* indices_buf = (int*)(values_buf + n);
+ int* sample_indices_buf = indices_buf + n;
const float* values = 0;
- int* indices_buf = data->get_pred_int_buf();
const int* indices = 0;
- data->get_ord_var_data( node, vi, values_buf, indices_buf, &values, &indices );
- float* responses_buf = data->get_resp_float_buf();
- const float* responses = 0;
- data->get_ord_responses(node, responses_buf, &responses);
+ data->get_ord_var_data( node, vi, values_buf, indices_buf, &values, &indices, sample_indices_buf );
+ float* responses_buf = (float*)(indices_buf + n);
+ const float* responses = data->get_ord_responses( node, responses_buf, sample_indices_buf );
int i, best_i = -1;
double L = 0, R = weights[n];
CvDTreeSplit*
-CvBoostTree::find_split_cat_reg( CvDTreeNode* node, int vi, float init_quality, CvDTreeSplit* _split )
+CvBoostTree::find_split_cat_reg( CvDTreeNode* node, int vi, float init_quality, CvDTreeSplit* _split, uchar* _ext_buf )
{
const double* weights = ensemble->get_subtree_weights()->data.db;
int ci = data->get_var_type(vi);
int n = node->sample_count;
int mi = data->cat_count->data.i[ci];
- int* cat_labels_buf = data->get_pred_int_buf();
- const int* cat_labels = 0;
- data->get_cat_var_data(node, vi, cat_labels_buf, &cat_labels);
- float* responses_buf = data->get_resp_float_buf();
- const float* responses = 0;
- data->get_ord_responses(node, responses_buf, &responses);
-
- double* sum = (double*)cvStackAlloc( (mi+1)*sizeof(sum[0]) ) + 1;
- double* counts = (double*)cvStackAlloc( (mi+1)*sizeof(counts[0]) ) + 1;
- double** sum_ptr = (double**)cvStackAlloc( mi*sizeof(sum_ptr[0]) );
+ int base_size = (2*mi+3)*sizeof(double) + mi*sizeof(double*);
+ cv::AutoBuffer<uchar> inn_buf(base_size);
+ if( !_ext_buf )
+ inn_buf.allocate(base_size + n*(2*sizeof(int) + sizeof(float)));
+ uchar* base_buf = (uchar*)inn_buf;
+ uchar* ext_buf = _ext_buf ? _ext_buf : base_buf + base_size;
+
+ int* cat_labels_buf = (int*)ext_buf;
+ const int* cat_labels = data->get_cat_var_data(node, vi, cat_labels_buf);
+ float* responses_buf = (float*)(cat_labels_buf + n);
+ int* sample_indices_buf = (int*)(responses_buf + n);
+ const float* responses = data->get_ord_responses(node, responses_buf, sample_indices_buf);
+
+ double* sum = (double*)cv::alignPtr(base_buf,sizeof(double)) + 1;
+ double* counts = sum + mi + 1;
+ double** sum_ptr = (double**)(counts + mi);
double L = 0, R = 0, best_val = init_quality, lsum = 0, rsum = 0;
int i, best_subset = -1, subset_i;
CvDTreeSplit*
-CvBoostTree::find_surrogate_split_ord( CvDTreeNode* node, int vi )
+CvBoostTree::find_surrogate_split_ord( CvDTreeNode* node, int vi, uchar* _ext_buf )
{
const float epsilon = FLT_EPSILON*2;
- float* values_buf = data->get_pred_float_buf();
+ int n = node->sample_count;
+ cv::AutoBuffer<uchar> inn_buf;
+ if( !_ext_buf )
+ inn_buf.allocate(n*(2*sizeof(int)+sizeof(float)));
+ uchar* ext_buf = _ext_buf ? _ext_buf : (uchar*)inn_buf;
+ float* values_buf = (float*)ext_buf;
+ int* indices_buf = (int*)(values_buf + n);
+ int* sample_indices_buf = indices_buf + n;
const float* values = 0;
- int* indices_buf = data->get_pred_int_buf();
const int* indices = 0;
- data->get_ord_var_data( node, vi, values_buf, indices_buf, &values, &indices );
+ data->get_ord_var_data( node, vi, values_buf, indices_buf, &values, &indices, sample_indices_buf );
const double* weights = ensemble->get_subtree_weights()->data.db;
const char* dir = (char*)data->direction->data.ptr;
CvDTreeSplit*
-CvBoostTree::find_surrogate_split_cat( CvDTreeNode* node, int vi )
+CvBoostTree::find_surrogate_split_cat( CvDTreeNode* node, int vi, uchar* _ext_buf )
{
const char* dir = (char*)data->direction->data.ptr;
const double* weights = ensemble->get_subtree_weights()->data.db;
int n = node->sample_count;
- int* cat_labels_buf = data->get_pred_int_buf();
- const int* cat_labels = 0;
- data->get_cat_var_data(node, vi, cat_labels_buf, &cat_labels);
+ int i, mi = data->cat_count->data.i[data->get_var_type(vi)];
+
+ int base_size = (2*mi+3)*sizeof(double);
+ cv::AutoBuffer<uchar> inn_buf(base_size);
+ if( !_ext_buf )
+ inn_buf.allocate(base_size + n*sizeof(int));
+ uchar* ext_buf = _ext_buf ? _ext_buf : (uchar*)inn_buf;
+ int* cat_labels_buf = (int*)ext_buf;
+ const int* cat_labels = data->get_cat_var_data(node, vi, cat_labels_buf);
// LL - number of samples that both the primary and the surrogate splits send to the left
// LR - ... primary split sends to the left and the surrogate split sends to the right
// RL - ... primary split sends to the right and the surrogate split sends to the left
// RR - ... both send to the right
CvDTreeSplit* split = data->new_split_cat( vi, 0 );
- int i, mi = data->cat_count->data.i[data->get_var_type(vi)];
double best_val = 0;
- double* lc = (double*)cvStackAlloc( (mi+1)*2*sizeof(lc[0]) ) + 1;
+ double* lc = (double*)cv::alignPtr(cat_labels_buf + n, sizeof(double)) + 1;
double* rc = lc + mi + 1;
for( i = -1; i < mi; i++ )
{
int i, n = node->sample_count;
const double* weights = ensemble->get_weights()->data.db;
- int* labels_buf = data->get_cv_lables_buf();
- const int* labels = 0;
- data->get_cv_labels(node, labels_buf, &labels);
+ cv::AutoBuffer<uchar> inn_buf(n*(sizeof(int) + ( data->is_classifier ? sizeof(int) : sizeof(int) + sizeof(float))));
+ int* labels_buf = (int*)(uchar*)inn_buf;
+ const int* labels = data->get_cv_labels(node, labels_buf);
double* subtree_weights = ensemble->get_subtree_weights()->data.db;
double rcw[2] = {0,0};
int boost_type = ensemble->get_params().boost_type;
if( data->is_classifier )
{
- int* _responses_buf = data->get_resp_int_buf();
- const int* _responses = 0;
- data->get_class_labels(node, _responses_buf, &_responses);
+ int* _responses_buf = labels_buf + n;
+ const int* _responses = data->get_class_labels(node, _responses_buf);
int m = data->get_num_classes();
int* cls_count = data->counts->data.i;
for( int k = 0; k < m; k++ )
// n is the number of samples in the node.
// * node risk is the sum of squared errors: sum_i((Y_i - <node_value>)^2)
double sum = 0, sum2 = 0, iw;
- float* values_buf = data->get_resp_float_buf();
- const float* values = 0;
- data->get_ord_responses(node, values_buf, &values);
+ float* values_buf = (float*)(labels_buf + n);
+ int* sample_indices_buf = (int*)(values_buf + n);
+ const float* values = data->get_ord_responses(node, values_buf, sample_indices_buf);
for( i = 0; i < n; i++ )
{
float* fdata = 0;
int *sample_idx_buf;
const int* sample_idx = 0;
+ cv::AutoBuffer<uchar> inn_buf;
+ int _buf_size = (params.boost_type == LOGIT) || (params.boost_type == GENTLE) ? data->sample_count*sizeof(int) : 0;
+ if( !tree )
+ _buf_size += n*sizeof(int);
+ else
+ {
+ if( have_subsample )
+ _buf_size += data->buf->step*(sizeof(float)+sizeof(uchar));
+ }
+ inn_buf.allocate(_buf_size);
+ uchar* cur_buf_pos = (uchar*)inn_buf;
+
if ( (params.boost_type == LOGIT) || (params.boost_type == GENTLE) )
{
step = CV_IS_MAT_CONT(data->responses_copy->type) ?
1 : data->responses_copy->step / CV_ELEM_SIZE(data->responses_copy->type);
fdata = data->responses_copy->data.fl;
- sample_idx_buf = (int*)cvStackAlloc(data->sample_count*sizeof(sample_idx_buf[0]));
- data->get_sample_indices( data->data_root, sample_idx_buf, &sample_idx );
+ sample_idx_buf = (int*)cur_buf_pos;
+ cur_buf_pos = (uchar*)(sample_idx_buf + data->sample_count);
+ sample_idx = data->get_sample_indices( data->data_root, sample_idx_buf );
}
- CvMat* buf = data->buf;
+ CvMat* dtree_data_buf = data->buf;
if( !tree ) // before training the first tree, initialize weights and other parameters
{
- int n = data->sample_count;
- int* class_labels_buf = data->get_resp_int_buf();
- const int* class_labels = 0;
- data->get_class_labels(data->data_root, class_labels_buf, &class_labels);
+ int* class_labels_buf = (int*)cur_buf_pos;
+ cur_buf_pos = (uchar*)(class_labels_buf + n);
+ const int* class_labels = data->get_class_labels(data->data_root, class_labels_buf);
// in case of logitboost and gentle adaboost each weak tree is a regression tree,
// so we need to convert class labels to floating-point values
- float* responses_buf = data->get_resp_float_buf();
- const float* responses = 0;
- data->get_ord_responses(data->data_root, responses_buf, &responses);
-
+
double w0 = 1./n;
double p[2] = { 1, 1 };
if (data->is_buf_16u)
{
- unsigned short* labels = (unsigned short*)(buf->data.s + data->data_root->buf_idx*buf->cols +
+ unsigned short* labels = (unsigned short*)(dtree_data_buf->data.s + data->data_root->buf_idx*dtree_data_buf->cols +
data->data_root->offset + (data->work_var_count-1)*data->sample_count);
for( i = 0; i < n; i++ )
{
}
else
{
- int* labels = buf->data.i + data->data_root->buf_idx*buf->cols +
+ int* labels = dtree_data_buf->data.i + data->data_root->buf_idx*dtree_data_buf->cols +
data->data_root->offset + (data->work_var_count-1)*data->sample_count;
for( i = 0; i < n; i++ )
// recent weak classifier we know the responses. For other samples we need to compute them
if( have_subsample )
{
- float* values0, *values = (float*)cvStackAlloc(data->buf->step*sizeof(float));
- uchar* missing0, *missing = (uchar*)cvStackAlloc(data->buf->step*sizeof(uchar));
+ float* values0, *values = (float*)cur_buf_pos;
+ cur_buf_pos = (uchar*)(values + data->buf->step);
+ uchar* missing0, *missing = cur_buf_pos;
+ cur_buf_pos = missing + data->buf->step;
CvMat _sample, _mask;
values0 = values;
missing0 = missing;
const double lb_weight_thresh = FLT_EPSILON;
const double lb_z_max = 10.;
- float* responses_buf = data->get_resp_float_buf();
+ /*float* responses_buf = data->get_resp_float_buf();
const float* responses = 0;
- data->get_ord_responses(data->data_root, responses_buf, &responses);
+ data->get_ord_responses(data->data_root, responses_buf, &responses);*/
/*if( weak->total == 7 )
putchar('*');*/
#include "_ml.h"
-#ifdef _OPENMP
-#include "omp.h"
-#endif
-
static const float ord_nan = FLT_MAX*0.5f;
static const int min_block_size = 1 << 16;
static const int block_size_delta = 1 << 10;
CV_CALL( direction = cvCreateMat( 1, sample_count, CV_8UC1 ));
CV_CALL( split_buf = cvCreateMat( 1, sample_count, CV_32SC1 ));
- {
- int maxNumThreads = 1;
-#ifdef _OPENMP
- maxNumThreads = omp_get_num_procs();
-#endif
- pred_float_buf.resize(maxNumThreads);
- pred_int_buf.resize(maxNumThreads);
- resp_float_buf.resize(maxNumThreads);
- resp_int_buf.resize(maxNumThreads);
- cv_lables_buf.resize(maxNumThreads);
- sample_idx_buf.resize(maxNumThreads);
- for( int ti = 0; ti < maxNumThreads; ti++ )
- {
- pred_float_buf[ti].resize(sample_count);
- pred_int_buf[ti].resize(sample_count);
- resp_float_buf[ti].resize(sample_count);
- resp_int_buf[ti].resize(sample_count);
- cv_lables_buf[ti].resize(sample_count);
- sample_idx_buf[ti].resize(sample_count);
- }
- }
-
__END__;
if( data )
cvReleaseMat( &tmp_map );
}
-int CvERTreeTrainData::get_ord_var_data( CvDTreeNode* n, int vi, float* ord_values_buf, int* missing_buf, const float** ord_values, const int** missing )
+void CvERTreeTrainData::get_ord_var_data( CvDTreeNode* n, int vi, float* ord_values_buf, int* missing_buf,
+ const float** ord_values, const int** missing, int* sample_indices_buf )
{
int vidx = var_idx ? var_idx->data.i[vi] : vi;
int node_sample_count = n->sample_count;
- int* sample_indices_buf = get_sample_idx_buf();
- const int* sample_indices = 0;
-
- get_sample_indices(n, sample_indices_buf, &sample_indices);
+ // may use missing_buf as buffer for sample indices!
+ const int* sample_indices = get_sample_indices(n, sample_indices_buf ? sample_indices_buf : missing_buf);
int td_step = train_data->step/CV_ELEM_SIZE(train_data->type);
int m_step = missing_mask ? missing_mask->step/CV_ELEM_SIZE(missing_mask->type) : 1;
}
*ord_values = ord_values_buf;
*missing = missing_buf;
- return 0; //TODO: return the number of non-missing values
}
-void CvERTreeTrainData::get_sample_indices( CvDTreeNode* n, int* indices_buf, const int** indices )
+const int* CvERTreeTrainData::get_sample_indices( CvDTreeNode* n, int* indices_buf )
{
- get_cat_var_data( n, var_count + (is_classifier ? 1 : 0) + (have_labels ? 1 : 0), indices_buf, indices );
+ return get_cat_var_data( n, var_count + (is_classifier ? 1 : 0) + (have_labels ? 1 : 0), indices_buf );
}
-void CvERTreeTrainData::get_cv_labels( CvDTreeNode* n, int* labels_buf, const int** labels )
+const int* CvERTreeTrainData::get_cv_labels( CvDTreeNode* n, int* labels_buf )
{
if (have_labels)
- get_cat_var_data( n, var_count + (is_classifier ? 1 : 0), labels_buf, labels );
+ return get_cat_var_data( n, var_count + (is_classifier ? 1 : 0), labels_buf );
+ return 0;
}
-int CvERTreeTrainData::get_cat_var_data( CvDTreeNode* n, int vi, int* cat_values_buf, const int** cat_values )
+const int* CvERTreeTrainData::get_cat_var_data( CvDTreeNode* n, int vi, int* cat_values_buf )
{
int ci = get_var_type( vi);
+ const int* cat_values = 0;
if( !is_buf_16u )
- *cat_values = buf->data.i + n->buf_idx*buf->cols +
- ci*sample_count + n->offset;
+ cat_values = buf->data.i + n->buf_idx*buf->cols + ci*sample_count + n->offset;
else {
const unsigned short* short_values = (const unsigned short*)(buf->data.s + n->buf_idx*buf->cols +
ci*sample_count + n->offset);
for( int i = 0; i < n->sample_count; i++ )
cat_values_buf[i] = short_values[i];
- *cat_values = cat_values_buf;
+ cat_values = cat_values_buf;
}
-
- return 0; //TODO: return the number of non-missing values
+ return cat_values;
}
void CvERTreeTrainData::get_vectors( const CvMat* _subsample_idx,
CvMat* subsample_idx = 0;
CvMat* subsample_co = 0;
+ cv::AutoBuffer<uchar> inn_buf(sample_count*(sizeof(float) + sizeof(int)));
+
CV_FUNCNAME( "CvERTreeTrainData::get_vectors" );
__BEGIN__;
{
float* dst = values + vi;
uchar* m = missing ? missing + vi : 0;
- int* src_buf = get_pred_int_buf();
- const int* src = 0;
- get_cat_var_data(data_root, vi, src_buf, &src);
+ int* lbls_buf = (int*)(uchar*)inn_buf;
+ const int* src = get_cat_var_data(data_root, vi, lbls_buf);
for( i = 0; i < count; i++, dst += var_count )
{
}
else // ordered
{
- float* dst_buf = values + vi;
- int* m_buf = get_pred_int_buf();
+ int* mis_buf = (int*)(uchar*)inn_buf;
const float *dst = 0;
- const int* m = 0;
- get_ord_var_data(data_root, vi, dst_buf, m_buf, &dst, &m);
+ const int* mis = 0;
+ get_ord_var_data(data_root, vi, values + vi, mis_buf, &dst, &mis, 0);
for (int si = 0; si < total; si++)
- *(missing + vi + si) = m[si] == 0 ? 0 : 1;
+ *(missing + vi + si) = mis[si] == 0 ? 0 : 1;
}
}
{
if( is_classifier )
{
- int* src_buf = get_resp_int_buf();
- const int* src = 0;
- get_class_labels(data_root, src_buf, &src);
+ int* lbls_buf = (int*)(uchar*)inn_buf;
+ const int* src = get_class_labels(data_root, lbls_buf);
for( i = 0; i < count; i++ )
{
int idx = sidx ? sidx[i] : i;
}
else
{
- float *_values_buf = get_resp_float_buf();
- const float* _values = 0;
- get_ord_responses(data_root, _values_buf, &_values);
+ float* _values_buf = (float*)(uchar*)inn_buf;
+ int* sample_idx_buf = (int*)(_values_buf + sample_count);
+ const float* _values = get_ord_responses(data_root, _values_buf, sample_idx_buf);
for( i = 0; i < count; i++ )
{
int idx = sidx ? sidx[i] : i;
if( data->get_var_type(vi) >= 0 ) // split on categorical var
{
- int* labels_buf = data->get_pred_int_buf();
- const int* labels = 0;
+ cv::AutoBuffer<uchar> inn_buf(n*sizeof(int)*(!data->have_priors ? 1 : 2));
+ int* labels_buf = (int*)(uchar*)inn_buf;
+ const int* labels = data->get_cat_var_data( node, vi, labels_buf );
const int* subset = node->split->subset;
- data->get_cat_var_data( node, vi, labels_buf, &labels );
if( !data->have_priors )
{
int sum = 0, sum_abs = 0;
{
const double* priors = data->priors_mult->data.db;
double sum = 0, sum_abs = 0;
- int *responses_buf = data->get_resp_int_buf();
- const int* responses;
- data->get_class_labels(node, responses_buf, &responses);
+ int *responses_buf = labels_buf + n;
+ const int* responses = data->get_class_labels(node, responses_buf);
for( i = 0; i < n; i++ )
{
else // split on ordered var
{
float split_val = node->split->ord.c;
- float* val_buf = data->get_pred_float_buf();
+ cv::AutoBuffer<uchar> inn_buf(n*(sizeof(int)*(!data->have_priors ? 1 : 2) + sizeof(float)));
+ float* val_buf = (float*)(uchar*)inn_buf;
+ int* missing_buf = (int*)(val_buf + n);
const float* val = 0;
- int* missing_buf = data->get_pred_int_buf();
const int* missing = 0;
- data->get_ord_var_data( node, vi, val_buf, missing_buf, &val, &missing );
+ data->get_ord_var_data( node, vi, val_buf, missing_buf, &val, &missing, 0 );
if( !data->have_priors )
{
else
{
const double* priors = data->priors_mult->data.db;
- int* responses_buf = data->get_resp_int_buf();
- const int* responses = 0;
- data->get_class_labels(node, responses_buf, &responses);
+ int* responses_buf = missing_buf + n;
+ const int* responses = data->get_class_labels(node, responses_buf);
L = R = 0;
for( i = 0; i < n; i++ )
{
return node->split->quality/(L + R);
}
-CvDTreeSplit* CvForestERTree::find_split_ord_class( CvDTreeNode* node, int vi, float init_quality, CvDTreeSplit* _split )
+CvDTreeSplit* CvForestERTree::find_split_ord_class( CvDTreeNode* node, int vi, float init_quality, CvDTreeSplit* _split,
+ uchar* _ext_buf )
{
const float epsilon = FLT_EPSILON*2;
const float split_delta = (1 + FLT_EPSILON) * FLT_EPSILON;
- int n = node->sample_count;
+ int n = node->sample_count, i;
int m = data->get_num_classes();
- float* values_buf = data->get_pred_float_buf();
+ cv::AutoBuffer<uchar> inn_buf;
+ if( !_ext_buf )
+ inn_buf.allocate(n*(2*sizeof(int) + sizeof(float)));
+ uchar* ext_buf = _ext_buf ? _ext_buf : (uchar*)inn_buf;
+ float* values_buf = (float*)ext_buf;
+ int* missing_buf = (int*)(values_buf + n);
const float* values = 0;
- int* missing_buf = data->get_pred_int_buf();
const int* missing = 0;
- data->get_ord_var_data( node, vi, values_buf, missing_buf, &values, &missing );
- int* responses_buf = data->get_resp_int_buf();
- const int* responses = 0;
- data->get_class_labels( node, responses_buf, &responses );
+ data->get_ord_var_data( node, vi, values_buf, missing_buf, &values, &missing, 0 );
+ int* responses_buf = missing_buf + n;
+ const int* responses = data->get_class_labels( node, responses_buf );
double lbest_val = 0, rbest_val = 0, best_val = init_quality, split_val = 0;
-
- int i;
-
const double* priors = data->have_priors ? data->priors_mult->data.db : 0;
-
bool is_find_split = false;
-
float pmin, pmax;
int smpi = 0;
while ( missing[smpi] && (smpi < n) )
return split;
}
-CvDTreeSplit* CvForestERTree::find_split_cat_class( CvDTreeNode* node, int vi, float init_quality, CvDTreeSplit* _split )
+CvDTreeSplit* CvForestERTree::find_split_cat_class( CvDTreeNode* node, int vi, float init_quality, CvDTreeSplit* _split,
+ uchar* _ext_buf )
{
int ci = data->get_var_type(vi);
int n = node->sample_count;
if ( vm > 1 )
{
- int* labels_buf = data->get_pred_int_buf();
- const int* labels = 0;
- data->get_cat_var_data( node, vi, labels_buf, &labels );
+ cv::AutoBuffer<int> inn_buf;
+ if( !_ext_buf )
+ inn_buf.allocate(2*n);
+ int* ext_buf = _ext_buf ? (int*)_ext_buf : (int*)inn_buf;
- int* responses_buf = data->get_resp_int_buf();
- const int* responses = 0;
- data->get_class_labels( node, responses_buf, &responses );
+ const int* labels = data->get_cat_var_data( node, vi, ext_buf );
+ const int* responses = data->get_class_labels( node, ext_buf + n );
const double* priors = data->have_priors ? data->priors_mult->data.db : 0;
return split;
}
-CvDTreeSplit* CvForestERTree::find_split_ord_reg( CvDTreeNode* node, int vi, float init_quality, CvDTreeSplit* _split )
+CvDTreeSplit* CvForestERTree::find_split_ord_reg( CvDTreeNode* node, int vi, float init_quality, CvDTreeSplit* _split,
+ uchar* _ext_buf )
{
const float epsilon = FLT_EPSILON*2;
const float split_delta = (1 + FLT_EPSILON) * FLT_EPSILON;
int n = node->sample_count;
- float* values_buf = data->get_pred_float_buf();
+ cv::AutoBuffer<uchar> inn_buf;
+ if( !_ext_buf )
+ inn_buf.allocate(n*(2*sizeof(int) + 2*sizeof(float)));
+ uchar* ext_buf = _ext_buf ? _ext_buf : (uchar*)inn_buf;
+ float* values_buf = (float*)ext_buf;
+ int* missing_buf = (int*)(values_buf + n);
const float* values = 0;
- int* missing_buf = data->get_pred_int_buf();
const int* missing = 0;
- data->get_ord_var_data( node, vi, values_buf, missing_buf, &values, &missing );
- float* responses_buf = data->get_resp_float_buf();
- const float* responses = 0;
- data->get_ord_responses( node, responses_buf, &responses );
+ data->get_ord_var_data( node, vi, values_buf, missing_buf, &values, &missing, 0 );
+ float* responses_buf = (float*)(missing_buf + n);
+ int* sample_indices_buf = (int*)(responses_buf + n);
+ const float* responses = data->get_ord_responses( node, responses_buf, sample_indices_buf );
double best_val = init_quality, split_val = 0, lsum = 0, rsum = 0;
int L = 0, R = 0;
return split;
}
-CvDTreeSplit* CvForestERTree::find_split_cat_reg( CvDTreeNode* node, int vi, float init_quality, CvDTreeSplit* _split )
+CvDTreeSplit* CvForestERTree::find_split_cat_reg( CvDTreeNode* node, int vi, float init_quality, CvDTreeSplit* _split,
+ uchar* _ext_buf )
{
int ci = data->get_var_type(vi);
int n = node->sample_count;
if ( vm > 1 )
{
- int* labels_buf = data->get_pred_int_buf();
- const int* labels = 0;
- data->get_cat_var_data( node, vi, labels_buf, &labels );
-
- float* responses_buf = data->get_resp_float_buf();
- const float* responses = 0;
- data->get_ord_responses( node, responses_buf, &responses );
+ int base_size = vm*sizeof(int);
+ cv::AutoBuffer<uchar> inn_buf(base_size);
+ if( !_ext_buf )
+ inn_buf.allocate(base_size + n*(2*sizeof(int) + sizeof(float)));
+ uchar* base_buf = (uchar*)inn_buf;
+ uchar* ext_buf = _ext_buf ? _ext_buf : base_buf + base_size;
+ int* labels_buf = (int*)ext_buf;
+ const int* labels = data->get_cat_var_data( node, vi, labels_buf );
+ float* responses_buf = (float*)(labels_buf + n);
+ int* sample_indices_buf = (int*)(responses_buf + n);
+ const float* responses = data->get_ord_responses( node, responses_buf, sample_indices_buf );
// create random class mask
- int *valid_cidx = (int*)cvStackAlloc(vm*sizeof(valid_cidx[0]));
+ int *valid_cidx = (int*)base_buf;
for (int i = 0; i < vm; i++)
{
valid_cidx[i] = -1;
return split;
}
-//void CvForestERTree::complete_node_dir( CvDTreeNode* node )
-//{
-// int vi, i, n = node->sample_count, nl, nr, d0 = 0, d1 = -1;
-// int nz = n - node->get_num_valid(node->split->var_idx);
-// char* dir = (char*)data->direction->data.ptr;
-//
-// // try to complete direction using surrogate splits
-// if( nz && data->params.use_surrogates )
-// {
-// CvDTreeSplit* split = node->split->next;
-// for( ; split != 0 && nz; split = split->next )
-// {
-// int inversed_mask = split->inversed ? -1 : 0;
-// vi = split->var_idx;
-//
-// if( data->get_var_type(vi) >= 0 ) // split on categorical var
-// {
-// int* labels_buf = data->pred_int_buf;
-// const int* labels = 0;
-// data->get_cat_var_data(node, vi, labels_buf, &labels);
-// const int* subset = split->subset;
-//
-// for( i = 0; i < n; i++ )
-// {
-// int idx = labels[i];
-// if( !dir[i] && ( ((idx >= 0)&&(!data->is_buf_16u)) || ((idx != 65535)&&(data->is_buf_16u)) ))
-//
-// {
-// int d = CV_DTREE_CAT_DIR(idx,subset);
-// dir[i] = (char)((d ^ inversed_mask) - inversed_mask);
-// if( --nz )
-// break;
-// }
-// }
-// }
-// else // split on ordered var
-// {
-// float* values_buf = data->pred_float_buf;
-// const float* values = 0;
-// uchar* missing_buf = (uchar*)data->pred_int_buf;
-// const uchar* missing = 0;
-// data->get_ord_var_data( node, vi, values_buf, missing_buf, &values, &missing );
-// float split_val = node->split->ord.c;
-//
-// for( i = 0; i < n; i++ )
-// {
-// if( !dir[i] && !missing[i])
-// {
-// int d = values[i] <= split_val ? -1 : 1;
-// dir[i] = (char)((d ^ inversed_mask) - inversed_mask);
-// if( --nz )
-// break;
-// }
-// }
-// }
-// }
-// }
-//
-// // find the default direction for the rest
-// if( nz )
-// {
-// for( i = nr = 0; i < n; i++ )
-// nr += dir[i] > 0;
-// nl = n - nr - nz;
-// d0 = nl > nr ? -1 : nr > nl;
-// }
-//
-// // make sure that every sample is directed either to the left or to the right
-// for( i = 0; i < n; i++ )
-// {
-// int d = dir[i];
-// if( !d )
-// {
-// d = d0;
-// if( !d )
-// d = d1, d1 = -d1;
-// }
-// d = d > 0;
-// dir[i] = (char)d; // remap (-1,1) to (0,1)
-// }
-//}
-
void CvForestERTree::split_node_data( CvDTreeNode* node )
{
int vi, i, n = node->sample_count, nl, nr, scount = data->sample_count;
(node->left->sample_count > data->params.min_sample_count ||
node->right->sample_count > data->params.min_sample_count);
+ cv::AutoBuffer<uchar> inn_buf(n*(sizeof(int)+sizeof(float)));
// split ordered vars
for( vi = 0; vi < data->var_count; vi++ )
{
if (ci >= 0) continue;
int n1 = node->get_num_valid(vi), nr1 = 0;
-
- float* values_buf = data->get_pred_float_buf();
+ float* values_buf = (float*)(uchar*)inn_buf;
+ int* missing_buf = (int*)(values_buf + n);
const float* values = 0;
- int* missing_buf = data->get_pred_int_buf();
const int* missing = 0;
- data->get_ord_var_data( node, vi, values_buf, missing_buf, &values, &missing );
+ data->get_ord_var_data( node, vi, values_buf, missing_buf, &values, &missing, 0 );
for( i = 0; i < n; i++ )
nr1 += (!missing[i] & dir[i]);
if (ci < 0) continue;
int n1 = node->get_num_valid(vi), nr1 = 0;
-
- int *src_lbls_buf = data->get_pred_int_buf();
- const int* src_lbls = 0;
- data->get_cat_var_data(node, vi, src_lbls_buf, &src_lbls);
+ const int* src_lbls = data->get_cat_var_data(node, vi, (int*)(uchar*)inn_buf);
for(i = 0; i < n; i++)
temp_buf[i] = src_lbls[i];
}
}
-
// split sample indices
- int *sample_idx_src_buf = data->get_sample_idx_buf();
+ int *sample_idx_src_buf = (int*)(uchar*)inn_buf;
const int* sample_idx_src = 0;
if (split_input_data)
{
- data->get_sample_indices(node, sample_idx_src_buf, &sample_idx_src);
+ sample_idx_src = data->get_sample_indices(node, sample_idx_src_buf);
for(i = 0; i < n; i++)
temp_buf[i] = sample_idx_src[i];
\r
#include "_ml.h"\r
\r
-#ifdef _OPENMP
-#include "omp.h"
-#endif\r
-\r
CvForestTree::CvForestTree()\r
{\r
forest = NULL;\r
}\r
\r
\r
-CvDTreeSplit* CvForestTree::find_best_split( CvDTreeNode* node )\r
-{\r
- int vi;\r
\r
- CvDTreeSplit *best_split = 0;\r
+namespace cv\r
+{\r
\r
- CvMat* active_var_mask = 0;\r
- if( forest )\r
- {\r
- int var_count;\r
- CvRNG* rng = forest->get_rng();\r
+ForestTreeBestSplitFinder::ForestTreeBestSplitFinder( CvForestTree* _tree, CvDTreeNode* _node ) :\r
+ DTreeBestSplitFinder(_tree, _node) {}\r
\r
- active_var_mask = forest->get_active_var_mask();\r
- var_count = active_var_mask->cols;\r
+ForestTreeBestSplitFinder::ForestTreeBestSplitFinder( const ForestTreeBestSplitFinder& finder, Split spl ) :\r
+ DTreeBestSplitFinder( finder, spl ) {}\r
\r
- CV_Assert( var_count == data->var_count );\r
+void ForestTreeBestSplitFinder::operator()(const BlockedRange& range)\r
+{\r
+ int vi, vi1 = range.begin(), vi2 = range.end();\r
+ int n = node->sample_count;\r
+ CvDTreeTrainData* data = tree->get_data();\r
+ AutoBuffer<uchar> inn_buf(2*n*(sizeof(int) + sizeof(float)));\r
\r
- for( vi = 0; vi < var_count; vi++ )\r
- {\r
- uchar temp;\r
- int i1 = cvRandInt(rng) % var_count;\r
- int i2 = cvRandInt(rng) % var_count;\r
- CV_SWAP( active_var_mask->data.ptr[i1],\r
- active_var_mask->data.ptr[i2], temp );\r
- }\r
- }\r
- int maxNumThreads = 1;\r
-#ifdef _OPENMP\r
- maxNumThreads = omp_get_num_procs();\r
-#endif\r
- std::vector<CvDTreeSplit*> splits(maxNumThreads);\r
- std::vector<CvDTreeSplit*> bestSplits(maxNumThreads);\r
- std::vector<int> canSplit(maxNumThreads);\r
- CvDTreeSplit **splitsPtr = &splits[0], ** bestSplitsPtr = &bestSplits[0];\r
- int* canSplitPtr = &canSplit[0];\r
- for (int i = 0; i < maxNumThreads; i++)\r
- {\r
- splits[i] = data->new_split_cat( 0, -1.0f );\r
- bestSplits[i] = data->new_split_cat( 0, -1.0f );\r
- canSplitPtr[i] = 0;\r
- }\r
+ CvForestTree* ftree = (CvForestTree*)tree;\r
+ const CvMat* active_var_mask = ftree->forest->get_active_var_mask();\r
\r
-#ifdef _OPENMP\r
-#pragma omp parallel for num_threads(maxNumThreads) schedule(dynamic)\r
-#endif\r
- for( vi = 0; vi < data->var_count; vi++ )\r
+ for( vi = vi1; vi < vi2; vi++ )\r
{\r
- CvDTreeSplit *res, *t;\r
- int threadIdx = 0;\r
-#ifdef _OPENMP\r
- threadIdx = omp_get_thread_num();\r
-#endif\r
+ CvDTreeSplit *res;\r
int ci = data->var_type->data.i[vi];\r
if( node->num_valid[vi] <= 1\r
|| (active_var_mask && !active_var_mask->data.ptr[vi]) )\r
if( data->is_classifier )\r
{\r
if( ci >= 0 )\r
- res = find_split_cat_class( node, vi, bestSplitsPtr[threadIdx]->quality, splitsPtr[threadIdx] );\r
+ res = ftree->find_split_cat_class( node, vi, bestSplit->quality, split, (uchar*)inn_buf );\r
else\r
- res = find_split_ord_class( node, vi, bestSplitsPtr[threadIdx]->quality, splitsPtr[threadIdx] );\r
+ res = ftree->find_split_ord_class( node, vi, bestSplit->quality, split, (uchar*)inn_buf );\r
}\r
else\r
{\r
if( ci >= 0 )\r
- res = find_split_cat_reg( node, vi, bestSplitsPtr[threadIdx]->quality, splitsPtr[threadIdx] );\r
+ res = ftree->find_split_cat_reg( node, vi, bestSplit->quality, split, (uchar*)inn_buf );\r
else\r
- res = find_split_ord_reg( node, vi, bestSplitsPtr[threadIdx]->quality, splitsPtr[threadIdx] );\r
+ res = ftree->find_split_ord_reg( node, vi, bestSplit->quality, split, (uchar*)inn_buf );\r
}\r
\r
- if( res )\r
- {\r
- canSplitPtr[threadIdx] = 1;\r
- if( bestSplits[threadIdx]->quality < splits[threadIdx]->quality )\r
- CV_SWAP( bestSplits[threadIdx], splits[threadIdx], t );\r
- }\r
+ if( res && bestSplit->quality < split->quality )\r
+ memcpy( (CvDTreeSplit*)bestSplit, (CvDTreeSplit*)split, splitSize );\r
}\r
- int ti = 0;\r
- for( ; ti < maxNumThreads; ti++ )\r
+}\r
+}\r
+\r
+CvDTreeSplit* CvForestTree::find_best_split( CvDTreeNode* node )\r
+{\r
+ CvMat* active_var_mask = 0;\r
+ if( forest )\r
{\r
- if( canSplitPtr[ti] )\r
+ int var_count;\r
+ CvRNG* rng = forest->get_rng();\r
+\r
+ active_var_mask = forest->get_active_var_mask();\r
+ var_count = active_var_mask->cols;\r
+\r
+ CV_Assert( var_count == data->var_count );\r
+\r
+ for( int vi = 0; vi < var_count; vi++ )\r
{\r
- best_split = bestSplitsPtr[ti];\r
- break;\r
+ uchar temp;\r
+ int i1 = cvRandInt(rng) % var_count;\r
+ int i2 = cvRandInt(rng) % var_count;\r
+ CV_SWAP( active_var_mask->data.ptr[i1],\r
+ active_var_mask->data.ptr[i2], temp );\r
}\r
}\r
- for( ; ti < maxNumThreads; ti++ )\r
- {\r
- if( best_split->quality < bestSplitsPtr[ti]->quality )\r
- best_split = bestSplitsPtr[ti];\r
- }\r
- for(int i = 0; i < maxNumThreads; i++)\r
- {\r
- cvSetRemoveByPtr( data->split_heap, splits[i] );\r
- if( bestSplits[i] != best_split )\r
- cvSetRemoveByPtr( data->split_heap, bestSplits[i] );\r
- }\r
- return best_split;\r
-}\r
\r
+ cv::ForestTreeBestSplitFinder finder( this, node );\r
+\r
+ cv::parallel_reduce(cv::BlockedRange(0, data->var_count), finder);\r
+\r
+ CvDTreeSplit *bestSplit = data->new_split_cat( 0, -1.0f );\r
+ memcpy( bestSplit, finder.bestSplit, finder.splitSize );\r
+\r
+ return bestSplit;\r
+}\r
\r
void CvForestTree::read( CvFileStorage* fs, CvFileNode* fnode, CvRTrees* _forest, CvDTreeTrainData* _data )\r
{\r
#include "_ml.h"
#include <ctype.h>
-#ifdef _OPENMP
-#include "omp.h"
-#endif
-
using namespace cv;
static const float ord_nan = FLT_MAX*0.5f;
CV_CALL( direction = cvCreateMat( 1, sample_count, CV_8UC1 ));
CV_CALL( split_buf = cvCreateMat( 1, sample_count, CV_32SC1 ));
- {
- int maxNumThreads = 1;
-#ifdef _OPENMP
- maxNumThreads = omp_get_num_procs();
-#endif
- pred_float_buf.resize(maxNumThreads);
- pred_int_buf.resize(maxNumThreads);
- resp_float_buf.resize(maxNumThreads);
- resp_int_buf.resize(maxNumThreads);
- cv_lables_buf.resize(maxNumThreads);
- sample_idx_buf.resize(maxNumThreads);
- for( int ti = 0; ti < maxNumThreads; ti++ )
- {
- pred_float_buf[ti].resize(sample_count);
- pred_int_buf[ti].resize(sample_count);
- resp_float_buf[ti].resize(sample_count);
- resp_int_buf[ti].resize(sample_count);
- cv_lables_buf[ti].resize(sample_count);
- sample_idx_buf[ti].resize(sample_count);
- }
- }
-
__END__;
if( data )
co[i*2+1] = -1;
}
+ cv::AutoBuffer<uchar> inn_buf(sample_count*(2*sizeof(int) + sizeof(float)));
for( vi = 0; vi < work_var_count; vi++ )
{
int ci = get_var_type(vi);
if( ci >= 0 || vi >= var_count )
{
- int* src_buf = get_pred_int_buf();
- const int* src = 0;
int num_valid = 0;
-
- get_cat_var_data( data_root, vi, src_buf, &src );
+ const int* src = get_cat_var_data( data_root, vi, (int*)(uchar*)inn_buf );
if (is_buf_16u)
{
}
else
{
- int *src_idx_buf = get_pred_int_buf();
+ int *src_idx_buf = (int*)(uchar*)inn_buf;
+ float *src_val_buf = (float*)(src_idx_buf + sample_count);
+ int* sample_indices_buf = (int*)(src_val_buf + sample_count);
const int* src_idx = 0;
- float *src_val_buf = get_pred_float_buf();
const float* src_val = 0;
+ get_ord_var_data( data_root, vi, src_val_buf, src_idx_buf, &src_val, &src_idx, sample_indices_buf );
int j = 0, idx, count_i;
int num_valid = data_root->get_num_valid(vi);
- get_ord_var_data( data_root, vi, src_val_buf, src_idx_buf, &src_val, &src_idx );
if (is_buf_16u)
{
unsigned short* udst_idx = (unsigned short*)(buf->data.s + root->buf_idx*buf->cols +
}
}
// sample indices subsampling
- int* sample_idx_src_buf = get_sample_idx_buf();
- const int* sample_idx_src = 0;
- get_sample_indices(data_root, sample_idx_src_buf, &sample_idx_src);
+ const int* sample_idx_src = get_sample_indices(data_root, (int*)(uchar*)inn_buf);
if (is_buf_16u)
{
unsigned short* sample_idx_dst = (unsigned short*)(buf->data.s + root->buf_idx*buf->cols +
int* sidx = 0;
int* co = 0;
+ cv::AutoBuffer<uchar> inn_buf(sample_count*(2*sizeof(int) + sizeof(float)));
if( _subsample_idx )
{
CV_CALL( subsample_idx = cvPreprocessIndexArray( _subsample_idx, sample_count ));
{
float* dst = values + vi;
uchar* m = missing ? missing + vi : 0;
- int* src_buf = get_pred_int_buf();
- const int* src = 0;
- get_cat_var_data(data_root, vi, src_buf, &src);
+ const int* src = get_cat_var_data(data_root, vi, (int*)(uchar*)inn_buf);
for( i = 0; i < count; i++, dst += var_count )
{
float* dst = values + vi;
uchar* m = missing ? missing + vi : 0;
int count1 = data_root->get_num_valid(vi);
- float *src_val_buf = get_pred_float_buf();
+ float *src_val_buf = (float*)(uchar*)inn_buf;
+ int* src_idx_buf = (int*)(src_val_buf + sample_count);
+ int* sample_indices_buf = src_idx_buf + sample_count;
const float *src_val = 0;
- int* src_idx_buf = get_pred_int_buf();
const int* src_idx = 0;
- get_ord_var_data(data_root, vi, src_val_buf, src_idx_buf, &src_val, &src_idx);
+ get_ord_var_data(data_root, vi, src_val_buf, src_idx_buf, &src_val, &src_idx, sample_indices_buf);
for( i = 0; i < count1; i++ )
{
{
if( is_classifier )
{
- int* src_buf = get_resp_int_buf();
- const int* src = 0;
- get_class_labels(data_root, src_buf, &src);
+ const int* src = get_class_labels(data_root, (int*)(uchar*)inn_buf);
for( i = 0; i < count; i++ )
{
int idx = sidx ? sidx[i] : i;
}
else
{
- float *_values_buf = get_resp_float_buf();
- const float* _values = 0;
- get_ord_responses(data_root, _values_buf, &_values);
+ float* val_buf = (float*)(uchar*)inn_buf;
+ int* sample_idx_buf = (int*)(val_buf + sample_count);
+ const float* _values = get_ord_responses(data_root, val_buf, sample_idx_buf);
for( i = 0; i < count; i++ )
{
int idx = sidx ? sidx[i] : i;
cvReleaseMat( &split_buf );
cvReleaseMemStorage( &temp_storage );
cvReleaseMat( &responses_copy );
- pred_float_buf.clear();
- pred_int_buf.clear();
- resp_float_buf.clear();
- resp_int_buf.clear();
- cv_lables_buf.clear();
- sample_idx_buf.clear();
-
cv_heap = nv_heap = 0;
}
return var_type->data.i[vi];
}
-int CvDTreeTrainData::get_ord_var_data( CvDTreeNode* n, int vi, float* ord_values_buf, int* indices_buf, const float** ord_values, const int** indices )
+void CvDTreeTrainData::get_ord_var_data( CvDTreeNode* n, int vi, float* ord_values_buf, int* sorted_indices_buf,
+ const float** ord_values, const int** sorted_indices, int* sample_indices_buf )
{
int vidx = var_idx ? var_idx->data.i[vi] : vi;
int node_sample_count = n->sample_count;
- int* sample_indices_buf = get_sample_idx_buf();
- const int* sample_indices = 0;
int td_step = train_data->step/CV_ELEM_SIZE(train_data->type);
- get_sample_indices(n, sample_indices_buf, &sample_indices);
+ const int* sample_indices = get_sample_indices(n, sample_indices_buf);
if( !is_buf_16u )
- *indices = buf->data.i + n->buf_idx*buf->cols +
+ *sorted_indices = buf->data.i + n->buf_idx*buf->cols +
vi*sample_count + n->offset;
else {
const unsigned short* short_indices = (const unsigned short*)(buf->data.s + n->buf_idx*buf->cols +
vi*sample_count + n->offset );
for( int i = 0; i < node_sample_count; i++ )
- indices_buf[i] = short_indices[i];
- *indices = indices_buf;
+ sorted_indices_buf[i] = short_indices[i];
+ *sorted_indices = sorted_indices_buf;
}
if( tflag == CV_ROW_SAMPLE )
{
for( int i = 0; i < node_sample_count &&
- ((((*indices)[i] >= 0) && !is_buf_16u) || (((*indices)[i] != 65535) && is_buf_16u)); i++ )
+ ((((*sorted_indices)[i] >= 0) && !is_buf_16u) || (((*sorted_indices)[i] != 65535) && is_buf_16u)); i++ )
{
- int idx = (*indices)[i];
+ int idx = (*sorted_indices)[i];
idx = sample_indices[idx];
ord_values_buf[i] = *(train_data->data.fl + idx * td_step + vidx);
}
}
else
for( int i = 0; i < node_sample_count &&
- ((((*indices)[i] >= 0) && !is_buf_16u) || (((*indices)[i] != 65535) && is_buf_16u)); i++ )
+ ((((*sorted_indices)[i] >= 0) && !is_buf_16u) || (((*sorted_indices)[i] != 65535) && is_buf_16u)); i++ )
{
- int idx = (*indices)[i];
+ int idx = (*sorted_indices)[i];
idx = sample_indices[idx];
ord_values_buf[i] = *(train_data->data.fl + vidx* td_step + idx);
}
*ord_values = ord_values_buf;
- return 0; //TODO: return the number of non-missing values
}
-void CvDTreeTrainData::get_class_labels( CvDTreeNode* n, int* labels_buf, const int** labels )
+const int* CvDTreeTrainData::get_class_labels( CvDTreeNode* n, int* labels_buf )
{
if (is_classifier)
- get_cat_var_data( n, var_count, labels_buf, labels );
+ return get_cat_var_data( n, var_count, labels_buf);
+ return 0;
}
-void CvDTreeTrainData::get_sample_indices( CvDTreeNode* n, int* indices_buf, const int** indices )
+const int* CvDTreeTrainData::get_sample_indices( CvDTreeNode* n, int* indices_buf )
{
- get_cat_var_data( n, get_work_var_count(), indices_buf, indices );
+ return get_cat_var_data( n, get_work_var_count(), indices_buf );
}
-void CvDTreeTrainData::get_ord_responses( CvDTreeNode* n, float* values_buf, const float** values)
+const float* CvDTreeTrainData::get_ord_responses( CvDTreeNode* n, float* values_buf, int*sample_indices_buf )
{
int sample_count = n->sample_count;
- int* indices_buf = get_sample_idx_buf();
- const int* indices = 0;
-
int r_step = CV_IS_MAT_CONT(responses->type) ? 1 : responses->step/CV_ELEM_SIZE(responses->type);
+ const int* indices = get_sample_indices(n, sample_indices_buf);
- get_sample_indices(n, indices_buf, &indices);
-
-
for( int i = 0; i < sample_count &&
(((indices[i] >= 0) && !is_buf_16u) || ((indices[i] != 65535) && is_buf_16u)); i++ )
{
values_buf[i] = *(responses->data.fl + idx * r_step);
}
- *values = values_buf;
+ return values_buf;
}
-void CvDTreeTrainData::get_cv_labels( CvDTreeNode* n, int* labels_buf, const int** labels )
+const int* CvDTreeTrainData::get_cv_labels( CvDTreeNode* n, int* labels_buf )
{
if (have_labels)
- get_cat_var_data( n, get_work_var_count()- 1, labels_buf, labels );
+ return get_cat_var_data( n, get_work_var_count()- 1, labels_buf);
+ return 0;
}
-int CvDTreeTrainData::get_cat_var_data( CvDTreeNode* n, int vi, int* cat_values_buf, const int** cat_values )
+const int* CvDTreeTrainData::get_cat_var_data( CvDTreeNode* n, int vi, int* cat_values_buf)
{
+ const int* cat_values = 0;
if( !is_buf_16u )
- *cat_values = buf->data.i + n->buf_idx*buf->cols +
- vi*sample_count + n->offset;
+ cat_values = buf->data.i + n->buf_idx*buf->cols +
+ vi*sample_count + n->offset;
else {
const unsigned short* short_values = (const unsigned short*)(buf->data.s + n->buf_idx*buf->cols +
vi*sample_count + n->offset);
for( int i = 0; i < n->sample_count; i++ )
cat_values_buf[i] = short_values[i];
- *cat_values = cat_values_buf;
+ cat_values = cat_values_buf;
}
-
- return 0; //TODO: return the number of non-missing values
+ return cat_values;
}
__END__;
}
-float* CvDTreeTrainData::get_pred_float_buf()
-{
- int i = 0;
-#ifdef _OPENMP
- i = omp_get_thread_num();
-#endif
- return &pred_float_buf[i][0];
-}
-int* CvDTreeTrainData::get_pred_int_buf()
-{
- int i = 0;
-#ifdef _OPENMP
- i = omp_get_thread_num();
-#endif
- return &pred_int_buf[i][0];
-}
-float* CvDTreeTrainData::get_resp_float_buf()
-{
- int i = 0;
-#ifdef _OPENMP
- i = omp_get_thread_num();
-#endif
- return &resp_float_buf[i][0];
-}
-int* CvDTreeTrainData::get_resp_int_buf()
-{
- int i = 0;
-#ifdef _OPENMP
- i = omp_get_thread_num();
-#endif
- return &resp_int_buf[i][0];
-}
-int* CvDTreeTrainData::get_cv_lables_buf()
-{
- int i = 0;
-#ifdef _OPENMP
- i = omp_get_thread_num();
-#endif
- return &cv_lables_buf[i][0];
-}
-int* CvDTreeTrainData::get_sample_idx_buf()
-{
- int i = 0;
-#ifdef _OPENMP
- i = omp_get_thread_num();
-#endif
- return &sample_idx_buf[i][0];
-}
-
/////////////////////// Decision Tree /////////////////////////
CvDTree::CvDTree()
if( data->get_var_type(vi) >= 0 ) // split on categorical var
{
- int* labels_buf = data->get_pred_int_buf();
- const int* labels = 0;
+ cv::AutoBuffer<int> inn_buf(n*(data->have_priors ? 1 : 2));
+ int* labels_buf = (int*)inn_buf;
+ const int* labels = data->get_cat_var_data( node, vi, labels_buf );
const int* subset = node->split->subset;
- data->get_cat_var_data( node, vi, labels_buf, &labels );
if( !data->have_priors )
{
int sum = 0, sum_abs = 0;
{
const double* priors = data->priors_mult->data.db;
double sum = 0, sum_abs = 0;
- int *responses_buf = data->get_resp_int_buf();
- const int* responses;
- data->get_class_labels(node, responses_buf, &responses);
+ int* responses_buf = labels_buf + n;
+ const int* responses = data->get_class_labels(node, responses_buf);
for( i = 0; i < n; i++ )
{
{
int split_point = node->split->ord.split_point;
int n1 = node->get_num_valid(vi);
-
- float* val_buf = data->get_pred_float_buf();
+ cv::AutoBuffer<uchar> inn_buf(n*(sizeof(int)*(data->have_priors ? 3 : 2) + sizeof(float)));
+ float* val_buf = (float*)(uchar*)inn_buf;
+ int* sorted_buf = (int*)(val_buf + n);
+ int* sample_idx_buf = sorted_buf + n;
const float* val = 0;
- int* sorted_buf = data->get_pred_int_buf();
const int* sorted = 0;
- data->get_ord_var_data( node, vi, val_buf, sorted_buf, &val, &sorted);
+ data->get_ord_var_data( node, vi, val_buf, sorted_buf, &val, &sorted, sample_idx_buf);
assert( 0 <= split_point && split_point < n1-1 );
else
{
const double* priors = data->priors_mult->data.db;
- int* responses_buf = data->get_resp_int_buf();
- const int* responses = 0;
- data->get_class_labels(node, responses_buf, &responses);
+ int* responses_buf = sample_idx_buf + n;
+ const int* responses = data->get_class_labels(node, responses_buf);
L = R = 0;
for( i = 0; i <= split_point; i++ )
return node->split->quality/(L + R);
}
-CvDTreeSplit* CvDTree::find_best_split( CvDTreeNode* node )
+
+namespace cv
{
- int vi;
- CvDTreeSplit *bestSplit = 0;
- int maxNumThreads = 1;
-#ifdef _OPENMP
- maxNumThreads = omp_get_num_procs();
-#endif
- vector<CvDTreeSplit*> splits(maxNumThreads);
- vector<CvDTreeSplit*> bestSplits(maxNumThreads);
- vector<int> canSplit(maxNumThreads);
- CvDTreeSplit **splitsPtr = &splits[0], ** bestSplitsPtr = &bestSplits[0];
- int* canSplitPtr = &canSplit[0];
- for (int i = 0; i < maxNumThreads; i++)
- {
- splitsPtr[i] = data->new_split_cat( 0, -1.0f );
- bestSplitsPtr[i] = data->new_split_cat( 0, -1.0f );
- canSplitPtr[i] = 0;
- }
-#ifdef _OPENMP
-#pragma omp parallel for num_threads(maxNumThreads) schedule(dynamic)
-#endif
- for( vi = 0; vi < data->var_count; vi++ )
+DTreeBestSplitFinder::DTreeBestSplitFinder( CvDTree* _tree, CvDTreeNode* _node)
+{
+ tree = _tree;
+ node = _node;
+ splitSize = tree->get_data()->split_heap->elem_size;
+
+ bestSplit = (CvDTreeSplit*)(new char[splitSize]);
+ memset((CvDTreeSplit*)bestSplit, 0, splitSize);
+ bestSplit->quality = -1;
+ bestSplit->condensed_idx = INT_MIN;
+ split = (CvDTreeSplit*)(new char[splitSize]);
+ memset((CvDTreeSplit*)split, 0, splitSize);
+ //haveSplit = false;
+}
+
+DTreeBestSplitFinder::DTreeBestSplitFinder( const DTreeBestSplitFinder& finder, Split )
+{
+ tree = finder.tree;
+ node = finder.node;
+ splitSize = tree->get_data()->split_heap->elem_size;
+
+ bestSplit = (CvDTreeSplit*)(new char[splitSize]);
+ memcpy((CvDTreeSplit*)(bestSplit), (const CvDTreeSplit*)finder.bestSplit, splitSize);
+ split = (CvDTreeSplit*)(new char[splitSize]);
+ memset((CvDTreeSplit*)split, 0, splitSize);
+}
+
+void DTreeBestSplitFinder::operator()(const BlockedRange& range)
+{
+ int vi, vi1 = range.begin(), vi2 = range.end();
+ int n = node->sample_count;
+ CvDTreeTrainData* data = tree->get_data();
+ AutoBuffer<uchar> inn_buf(2*n*(sizeof(int) + sizeof(float)));
+
+ for( vi = vi1; vi < vi2; vi++ )
{
- CvDTreeSplit *res, *t;
- int threadIdx = 0;
-#ifdef _OPENMP
- threadIdx = omp_get_thread_num();
-#endif
+ CvDTreeSplit *res;
int ci = data->get_var_type(vi);
if( node->get_num_valid(vi) <= 1 )
continue;
if( data->is_classifier )
{
if( ci >= 0 )
- res = find_split_cat_class( node, vi, bestSplitsPtr[threadIdx]->quality, splitsPtr[threadIdx] );
+ res = tree->find_split_cat_class( node, vi, bestSplit->quality, split, (uchar*)inn_buf );
else
- res = find_split_ord_class( node, vi, bestSplitsPtr[threadIdx]->quality, splitsPtr[threadIdx] );
+ res = tree->find_split_ord_class( node, vi, bestSplit->quality, split, (uchar*)inn_buf );
}
else
{
if( ci >= 0 )
- res = find_split_cat_reg( node, vi, bestSplitsPtr[threadIdx]->quality, splitsPtr[threadIdx] );
+ res = tree->find_split_cat_reg( node, vi, bestSplit->quality, split, (uchar*)inn_buf );
else
- res = find_split_ord_reg( node, vi, bestSplitsPtr[threadIdx]->quality, splitsPtr[threadIdx] );
+ res = tree->find_split_ord_reg( node, vi, bestSplit->quality, split, (uchar*)inn_buf );
}
- if( res )
- {
- canSplitPtr[threadIdx] = 1;
- if( bestSplitsPtr[threadIdx]->quality < splitsPtr[threadIdx]->quality )
- CV_SWAP( bestSplitsPtr[threadIdx], splitsPtr[threadIdx], t );
- }
- }
- int ti = 0;
- for( ; ti < maxNumThreads; ti++ )
- {
- if( canSplitPtr[ti] )
- {
- bestSplit = bestSplitsPtr[ti];
- break;
- }
- }
- for( ; ti < maxNumThreads; ti++ )
- {
- if( bestSplit->quality < bestSplitsPtr[ti]->quality )
- bestSplit = bestSplitsPtr[ti];
- }
- for(int i = 0; i < maxNumThreads; i++)
- {
- cvSetRemoveByPtr( data->split_heap, splitsPtr[i] );
- if( bestSplitsPtr[i] != bestSplit )
- cvSetRemoveByPtr( data->split_heap, bestSplitsPtr[i] );
+ if( res && bestSplit->quality < split->quality )
+ memcpy( (CvDTreeSplit*)bestSplit, (CvDTreeSplit*)split, splitSize );
}
+}
+
+void DTreeBestSplitFinder::join( DTreeBestSplitFinder& rhs )
+{
+ if( bestSplit->quality < rhs.bestSplit->quality )
+ memcpy( (CvDTreeSplit*)bestSplit, (CvDTreeSplit*)rhs.bestSplit, splitSize );
+}
+}
+
+
+CvDTreeSplit* CvDTree::find_best_split( CvDTreeNode* node )
+{
+ DTreeBestSplitFinder finder( this, node );
+
+ cv::parallel_reduce(cv::BlockedRange(0, data->var_count), finder);
+
+ CvDTreeSplit *bestSplit = data->new_split_cat( 0, -1.0f );
+ memcpy( bestSplit, finder.bestSplit, finder.splitSize );
+
return bestSplit;
}
CvDTreeSplit* CvDTree::find_split_ord_class( CvDTreeNode* node, int vi,
- float init_quality, CvDTreeSplit* _split )
+ float init_quality, CvDTreeSplit* _split, uchar* _ext_buf )
{
const float epsilon = FLT_EPSILON*2;
int n = node->sample_count;
int n1 = node->get_num_valid(vi);
int m = data->get_num_classes();
- float* values_buf = data->get_pred_float_buf();
+ int base_size = 2*m*sizeof(int);
+ cv::AutoBuffer<uchar> inn_buf(base_size);
+ if( !_ext_buf )
+ inn_buf.allocate(base_size + n*(3*sizeof(int)+sizeof(float)));
+ uchar* base_buf = (uchar*)inn_buf;
+ uchar* ext_buf = _ext_buf ? _ext_buf : base_buf + base_size;
+ float* values_buf = (float*)ext_buf;
+ int* sorted_indices_buf = (int*)(values_buf + n);
+ int* sample_indices_buf = sorted_indices_buf + n;
const float* values = 0;
- int* indices_buf = data->get_pred_int_buf();
- const int* indices = 0;
- data->get_ord_var_data( node, vi, values_buf, indices_buf, &values, &indices );
- int* responses_buf = data->get_resp_int_buf();
- const int* responses = 0;
- data->get_class_labels( node, responses_buf, &responses );
+ const int* sorted_indices = 0;
+ data->get_ord_var_data( node, vi, values_buf, sorted_indices_buf, &values,
+ &sorted_indices, sample_indices_buf );
+ int* responses_buf = sample_indices_buf + n;
+ const int* responses = data->get_class_labels( node, responses_buf );
const int* rc0 = data->counts->data.i;
- int* lc = (int*)cvStackAlloc(m*sizeof(lc[0]));
- int* rc = (int*)cvStackAlloc(m*sizeof(rc[0]));
+ int* lc = (int*)base_buf;
+ int* rc = lc + m;
int i, best_i = -1;
double lsum2 = 0, rsum2 = 0, best_val = init_quality;
const double* priors = data->have_priors ? data->priors_mult->data.db : 0;
// compensate for missing values
for( i = n1; i < n; i++ )
{
- rc[responses[indices[i]]]--;
+ rc[responses[sorted_indices[i]]]--;
}
if( !priors )
for( i = 0; i < n1 - 1; i++ )
{
- int idx = responses[indices[i]];
+ int idx = responses[sorted_indices[i]];
int lv, rv;
L++; R--;
lv = lc[idx]; rv = rc[idx];
for( i = 0; i < n1 - 1; i++ )
{
- int idx = responses[indices[i]];
+ int idx = responses[sorted_indices[i]];
int lv, rv;
double p = priors[idx], p2 = p*p;
L += p; R -= p;
}
-CvDTreeSplit* CvDTree::find_split_cat_class( CvDTreeNode* node, int vi, float init_quality, CvDTreeSplit* _split )
+CvDTreeSplit* CvDTree::find_split_cat_class( CvDTreeNode* node, int vi, float init_quality,
+ CvDTreeSplit* _split, uchar* _ext_buf )
{
int ci = data->get_var_type(vi);
int n = node->sample_count;
int m = data->get_num_classes();
int _mi = data->cat_count->data.i[ci], mi = _mi;
- int* labels_buf = data->get_pred_int_buf();
- const int* labels = 0;
- data->get_cat_var_data(node, vi, labels_buf, &labels);
- int *responses_buf = data->get_resp_int_buf();
- const int* responses = 0;
- data->get_class_labels(node, responses_buf, &responses);
-
- int* lc = (int*)cvStackAlloc(m*sizeof(lc[0]));
- int* rc = (int*)cvStackAlloc(m*sizeof(rc[0]));
- int* _cjk = (int*)cvStackAlloc(m*(mi+1)*sizeof(_cjk[0]))+m, *cjk = _cjk;
- double* c_weights = (double*)cvStackAlloc( mi*sizeof(c_weights[0]) );
+ int base_size = m*(3 + mi)*sizeof(int) + (mi+1)*sizeof(double);
+ if( m > 2 && mi > data->params.max_categories )
+ base_size += (m*min(data->params.max_categories, n) + mi)*sizeof(int);
+ else
+ base_size += mi*sizeof(int*);
+ cv::AutoBuffer<uchar> inn_buf(base_size);
+ if( !_ext_buf )
+ inn_buf.allocate(base_size + 2*n*sizeof(int));
+ uchar* base_buf = (uchar*)inn_buf;
+ uchar* ext_buf = _ext_buf ? _ext_buf : base_buf + base_size;
+
+ int* lc = (int*)base_buf;
+ int* rc = lc + m;
+ int* _cjk = rc + m*2, *cjk = _cjk;
+ double* c_weights = (double*)alignPtr(cjk + m*mi, sizeof(double));
+
+ int* labels_buf = (int*)ext_buf;
+ const int* labels = data->get_cat_var_data(node, vi, labels_buf);
+ int* responses_buf = labels_buf + n;
+ const int* responses = data->get_class_labels(node, responses_buf);
+
int* cluster_labels = 0;
int** int_ptr = 0;
int i, j, k, idx;
if( mi > data->params.max_categories )
{
mi = MIN(data->params.max_categories, n);
- cjk = (int*)cvStackAlloc( m*mi*sizeof(cjk[0]) );
- cluster_labels = (int*)cvStackAlloc( _mi*sizeof(cluster_labels[0]) );
+ cjk = (int*)(c_weights + _mi);
+ cluster_labels = cjk + m*mi;
cluster_categories( _cjk, _mi, m, cjk, mi, cluster_labels );
}
subset_i = 1;
else
{
assert( m == 2 );
- int_ptr = (int**)cvStackAlloc( mi*sizeof(int_ptr[0]) );
+ int_ptr = (int**)(c_weights + _mi);
for( j = 0; j < mi; j++ )
int_ptr[j] = cjk + j*2 + 1;
icvSortIntPtr( int_ptr, mi, 0 );
}
-CvDTreeSplit* CvDTree::find_split_ord_reg( CvDTreeNode* node, int vi, float init_quality, CvDTreeSplit* _split )
+CvDTreeSplit* CvDTree::find_split_ord_reg( CvDTreeNode* node, int vi, float init_quality, CvDTreeSplit* _split, uchar* _ext_buf )
{
const float epsilon = FLT_EPSILON*2;
int n = node->sample_count;
int n1 = node->get_num_valid(vi);
- float* values_buf = data->get_pred_float_buf();
+ cv::AutoBuffer<uchar> inn_buf;
+ if( !_ext_buf )
+ inn_buf.allocate(2*n*(sizeof(int) + sizeof(float)));
+ uchar* ext_buf = _ext_buf ? _ext_buf : (uchar*)inn_buf;
+ float* values_buf = (float*)ext_buf;
+ int* sorted_indices_buf = (int*)(values_buf + n);
+ int* sample_indices_buf = sorted_indices_buf + n;
const float* values = 0;
- int* indices_buf = data->get_pred_int_buf();
- const int* indices = 0;
- data->get_ord_var_data( node, vi, values_buf, indices_buf, &values, &indices );
- float* responses_buf = data->get_resp_float_buf();
- const float* responses = 0;
- data->get_ord_responses( node, responses_buf, &responses );
+ const int* sorted_indices = 0;
+ data->get_ord_var_data( node, vi, values_buf, sorted_indices_buf, &values, &sorted_indices, sample_indices_buf );
+ float* responses_buf = (float*)(sample_indices_buf + n);
+ const float* responses = data->get_ord_responses( node, responses_buf, sample_indices_buf );
int i, best_i = -1;
double best_val = init_quality, lsum = 0, rsum = node->value*n;
// compensate for missing values
for( i = n1; i < n; i++ )
- rsum -= responses[indices[i]];
+ rsum -= responses[sorted_indices[i]];
// find the optimal split
for( i = 0; i < n1 - 1; i++ )
{
- float t = responses[indices[i]];
+ float t = responses[sorted_indices[i]];
L++; R--;
lsum += t;
rsum -= t;
return split;
}
-CvDTreeSplit* CvDTree::find_split_cat_reg( CvDTreeNode* node, int vi, float init_quality, CvDTreeSplit* _split )
+CvDTreeSplit* CvDTree::find_split_cat_reg( CvDTreeNode* node, int vi, float init_quality, CvDTreeSplit* _split, uchar* _ext_buf )
{
int ci = data->get_var_type(vi);
int n = node->sample_count;
int mi = data->cat_count->data.i[ci];
- int* labels_buf = data->get_pred_int_buf();
- const int* labels = 0;
- float* responses_buf = data->get_resp_float_buf();
- const float* responses = 0;
- data->get_cat_var_data(node, vi, labels_buf, &labels);
- data->get_ord_responses(node, responses_buf, &responses);
-
- double* sum = (double*)cvStackAlloc( (mi+1)*sizeof(sum[0]) ) + 1;
- int* counts = (int*)cvStackAlloc( (mi+1)*sizeof(counts[0]) ) + 1;
- double** sum_ptr = (double**)cvStackAlloc( (mi+1)*sizeof(sum_ptr[0]) );
+
+ int base_size = (mi+2)*sizeof(double) + (mi+1)*(sizeof(int) + sizeof(double*));
+ cv::AutoBuffer<uchar> inn_buf(base_size);
+ if( !_ext_buf )
+ inn_buf.allocate(base_size + n*(2*sizeof(int) + sizeof(float)));
+ uchar* base_buf = (uchar*)inn_buf;
+ uchar* ext_buf = _ext_buf ? _ext_buf : base_buf + base_size;
+ int* labels_buf = (int*)ext_buf;
+ const int* labels = data->get_cat_var_data(node, vi, labels_buf);
+ float* responses_buf = (float*)(labels_buf + n);
+ int* sample_indices_buf = (int*)(responses_buf + n);
+ const float* responses = data->get_ord_responses(node, responses_buf, sample_indices_buf);
+
+ double* sum = (double*)cv::alignPtr(base_buf,sizeof(double)) + 1;
+ int* counts = (int*)(sum + mi) + 1;
+ double** sum_ptr = (double**)(counts + mi);
int i, L = 0, R = 0;
double best_val = init_quality, lsum = 0, rsum = 0;
int best_subset = -1, subset_i;
return split;
}
-CvDTreeSplit* CvDTree::find_surrogate_split_ord( CvDTreeNode* node, int vi )
+CvDTreeSplit* CvDTree::find_surrogate_split_ord( CvDTreeNode* node, int vi, uchar* _ext_buf )
{
const float epsilon = FLT_EPSILON*2;
const char* dir = (char*)data->direction->data.ptr;
- int n1 = node->get_num_valid(vi);
- float* values_buf = data->get_pred_float_buf();
+ int n = node->sample_count, n1 = node->get_num_valid(vi);
+ cv::AutoBuffer<uchar> inn_buf;
+ if( !_ext_buf )
+ inn_buf.allocate( n*(sizeof(int)*(data->have_priors ? 3 : 2) + sizeof(float)) );
+ uchar* ext_buf = _ext_buf ? _ext_buf : (uchar*)inn_buf;
+ float* values_buf = (float*)ext_buf;
+ int* sorted_indices_buf = (int*)(values_buf + n);
+ int* sample_indices_buf = sorted_indices_buf + n;
const float* values = 0;
- int* indices_buf = data->get_pred_int_buf();
- const int* indices = 0;
- data->get_ord_var_data( node, vi, values_buf, indices_buf, &values, &indices );
+ const int* sorted_indices = 0;
+ data->get_ord_var_data( node, vi, values_buf, sorted_indices_buf, &values, &sorted_indices, sample_indices_buf );
// LL - number of samples that both the primary and the surrogate splits send to the left
// LR - ... primary split sends to the left and the surrogate split sends to the right
// RL - ... primary split sends to the right and the surrogate split sends to the left
for( i = 0; i < n1; i++ )
{
- int d = dir[indices[i]];
+ int d = dir[sorted_indices[i]];
sum += d; sum_abs += d & 1;
}
// now iteratively compute LL, LR, RL and RR for every possible surrogate split value.
for( i = 0; i < n1 - 1; i++ )
{
- int d = dir[indices[i]];
+ int d = dir[sorted_indices[i]];
if( d < 0 )
{
double worst_val = node->maxlr;
double sum = 0, sum_abs = 0;
const double* priors = data->priors_mult->data.db;
- int* responses_buf = data->get_resp_int_buf();
- const int* responses = 0;
- data->get_class_labels(node, responses_buf, &responses);
+ int* responses_buf = sample_indices_buf + n;
+ const int* responses = data->get_class_labels(node, responses_buf);
best_val = worst_val;
for( i = 0; i < n1; i++ )
{
- int idx = indices[i];
+ int idx = sorted_indices[i];
double w = priors[responses[idx]];
int d = dir[idx];
sum += d*w; sum_abs += (d & 1)*w;
// now iteratively compute LL, LR, RL and RR for every possible surrogate split value.
for( i = 0; i < n1 - 1; i++ )
{
- int idx = indices[i];
+ int idx = sorted_indices[i];
double w = priors[responses[idx]];
int d = dir[idx];
}
-CvDTreeSplit* CvDTree::find_surrogate_split_cat( CvDTreeNode* node, int vi )
+CvDTreeSplit* CvDTree::find_surrogate_split_cat( CvDTreeNode* node, int vi, uchar* _ext_buf )
{
const char* dir = (char*)data->direction->data.ptr;
int n = node->sample_count;
- int* labels_buf = data->get_pred_int_buf();
- const int* labels = 0;
- data->get_cat_var_data(node, vi, labels_buf, &labels);
+ int i, mi = data->cat_count->data.i[data->get_var_type(vi)], l_win = 0;
+
+ int base_size = (2*(mi+1)+1)*sizeof(double) + (!data->have_priors ? 2*(mi+1)*sizeof(int) : 0);
+ cv::AutoBuffer<uchar> inn_buf(base_size);
+ if( !_ext_buf )
+ inn_buf.allocate(base_size + n*(sizeof(int) + (data->have_priors ? sizeof(int) : 0)));
+ uchar* base_buf = (uchar*)inn_buf;
+ uchar* ext_buf = _ext_buf ? _ext_buf : base_buf + base_size;
+
+ int* labels_buf = (int*)ext_buf;
+ const int* labels = data->get_cat_var_data(node, vi, labels_buf);
// LL - number of samples that both the primary and the surrogate splits send to the left
// LR - ... primary split sends to the left and the surrogate split sends to the right
// RL - ... primary split sends to the right and the surrogate split sends to the left
// RR - ... both send to the right
CvDTreeSplit* split = data->new_split_cat( vi, 0 );
- int i, mi = data->cat_count->data.i[data->get_var_type(vi)], l_win = 0;
double best_val = 0;
- double* lc = (double*)cvStackAlloc( (mi+1)*2*sizeof(lc[0]) ) + 1;
+ double* lc = (double*)cv::alignPtr(base_buf,sizeof(double)) + 1;
double* rc = lc + mi + 1;
for( i = -1; i < mi; i++ )
// sent to the left (lc) and to the right (rc) by the primary split
if( !data->have_priors )
{
- int* _lc = (int*)cvStackAlloc((mi+2)*2*sizeof(_lc[0])) + 1;
+ int* _lc = (int*)rc + 1;
int* _rc = _lc + mi + 1;
for( i = -1; i < mi; i++ )
else
{
const double* priors = data->priors_mult->data.db;
- int* responses_buf = data->get_resp_int_buf();
- const int* responses = 0;
- data->get_class_labels(node, responses_buf, &responses);
+ int* responses_buf = labels_buf + n;
+ const int* responses = data->get_class_labels(node, responses_buf);
for( i = 0; i < n; i++ )
{
void CvDTree::calc_node_value( CvDTreeNode* node )
{
int i, j, k, n = node->sample_count, cv_n = data->params.cv_folds;
- int* cv_labels_buf = data->get_cv_lables_buf();
- const int* cv_labels = 0;
- data->get_cv_labels(node, cv_labels_buf, &cv_labels);
+ int m = data->get_num_classes();
+
+ int base_size = data->is_classifier ? m*cv_n*sizeof(int) : 2*cv_n*sizeof(double)+cv_n*sizeof(int);
+ int ext_size = n*(sizeof(int) + (data->is_classifier ? sizeof(int) : sizeof(int)+sizeof(float)));
+ cv::AutoBuffer<uchar> inn_buf(base_size + ext_size);
+ uchar* base_buf = (uchar*)inn_buf;
+ uchar* ext_buf = base_buf + base_size;
+
+ int* cv_labels_buf = (int*)ext_buf;
+ const int* cv_labels = data->get_cv_labels(node, cv_labels_buf);
if( data->is_classifier )
{
// compute the number of instances of each class
int* cls_count = data->counts->data.i;
- int* responses_buf = data->get_resp_int_buf();
- const int* responses = 0;
- data->get_class_labels(node, responses_buf, &responses);
- int m = data->get_num_classes();
- int* cv_cls_count = (int*)cvStackAlloc(m*cv_n*sizeof(cv_cls_count[0]));
+ int* responses_buf = cv_labels_buf + n;
+ const int* responses = data->get_class_labels(node, responses_buf);
+ int* cv_cls_count = (int*)base_buf;
double max_val = -1, total_weight = 0;
int max_k = -1;
double* priors = data->priors_mult->data.db;
// over the samples with cv_labels(*)==j.
double sum = 0, sum2 = 0;
- float* values_buf = data->get_resp_float_buf();
- const float* values = 0;
- data->get_ord_responses(node, values_buf, &values);
+ float* values_buf = (float*)(cv_labels_buf + n);
+ int* sample_indices_buf = (int*)(values_buf + n);
+ const float* values = data->get_ord_responses(node, values_buf, sample_indices_buf);
double *cv_sum = 0, *cv_sum2 = 0;
int* cv_count = 0;
}
else
{
- cv_sum = (double*)cvStackAlloc( cv_n*sizeof(cv_sum[0]) );
- cv_sum2 = (double*)cvStackAlloc( cv_n*sizeof(cv_sum2[0]) );
- cv_count = (int*)cvStackAlloc( cv_n*sizeof(cv_count[0]) );
+ cv_sum = (double*)base_buf;
+ cv_sum2 = cv_sum + cv_n;
+ cv_count = (int*)(cv_sum2 + cv_n);
for( j = 0; j < cv_n; j++ )
{
// try to complete direction using surrogate splits
if( nz && data->params.use_surrogates )
{
+ cv::AutoBuffer<uchar> inn_buf(n*(2*sizeof(int)+sizeof(float)));
CvDTreeSplit* split = node->split->next;
for( ; split != 0 && nz; split = split->next )
{
if( data->get_var_type(vi) >= 0 ) // split on categorical var
{
- int* labels_buf = data->get_pred_int_buf();
- const int* labels = 0;
- data->get_cat_var_data(node, vi, labels_buf, &labels);
+ int* labels_buf = (int*)(uchar*)inn_buf;
+ const int* labels = data->get_cat_var_data(node, vi, labels_buf);
const int* subset = split->subset;
for( i = 0; i < n; i++ )
}
else // split on ordered var
{
- float* values_buf = data->get_pred_float_buf();
+ float* values_buf = (float*)(uchar*)inn_buf;
+ int* sorted_indices_buf = (int*)(values_buf + n);
+ int* sample_indices_buf = sorted_indices_buf + n;
const float* values = 0;
- int* indices_buf = data->get_pred_int_buf();
- const int* indices = 0;
- data->get_ord_var_data( node, vi, values_buf, indices_buf, &values, &indices );
+ const int* sorted_indices = 0;
+ data->get_ord_var_data( node, vi, values_buf, sorted_indices_buf, &values, &sorted_indices, sample_indices_buf );
int split_point = split->ord.split_point;
int n1 = node->get_num_valid(vi);
for( i = 0; i < n1; i++ )
{
- int idx = indices[i];
+ int idx = sorted_indices[i];
if( !dir[idx] )
{
int d = i <= split_point ? -1 : 1;
int new_buf_idx = data->get_child_buf_idx( node );
int work_var_count = data->get_work_var_count();
CvMat* buf = data->buf;
- cv::AutoBuffer<int, 1<<14> _temp_buf(n);
- int* temp_buf = _temp_buf;
+ cv::AutoBuffer<uchar> inn_buf(n*(3*sizeof(int) + sizeof(float)));
+ int* temp_buf = (int*)(uchar*)inn_buf;
complete_node_dir(node);
nl += d^1;
}
-
bool split_input_data;
node->left = left = data->new_node( node, nl, new_buf_idx, node->offset );
node->right = right = data->new_node( node, nr, new_buf_idx, node->offset + nl );
for( vi = 0; vi < data->var_count; vi++ )
{
int ci = data->get_var_type(vi);
- int n1 = node->get_num_valid(vi);
- int *src_idx_buf = data->get_pred_int_buf();
- const int* src_idx = 0;
- float *src_val_buf = data->get_pred_float_buf();
- const float* src_val = 0;
-
+
if( ci >= 0 || !split_input_data )
continue;
- data->get_ord_var_data(node, vi, src_val_buf, src_idx_buf, &src_val, &src_idx);
+ int n1 = node->get_num_valid(vi);
+ float* src_val_buf = (float*)(uchar*)(temp_buf + n);
+ int* src_sorted_idx_buf = (int*)(src_val_buf + n);
+ int* src_sample_idx_buf = src_sorted_idx_buf + n;
+ const float* src_val = 0;
+ const int* src_sorted_idx = 0;
+ data->get_ord_var_data(node, vi, src_val_buf, src_sorted_idx_buf, &src_val, &src_sorted_idx, src_sample_idx_buf);
for(i = 0; i < n; i++)
- temp_buf[i] = src_idx[i];
+ temp_buf[i] = src_sorted_idx[i];
if (data->is_buf_16u)
{
if( ci < 0 || (vi < data->var_count && !split_input_data) )
continue;
- int *src_lbls_buf = data->get_pred_int_buf();
- const int* src_lbls = 0;
- data->get_cat_var_data(node, vi, src_lbls_buf, &src_lbls);
+ int *src_lbls_buf = temp_buf + n;
+ const int* src_lbls = data->get_cat_var_data(node, vi, src_lbls_buf);
for(i = 0; i < n; i++)
temp_buf[i] = src_lbls[i];
// split sample indices
- int *sample_idx_src_buf = data->get_sample_idx_buf();
- const int* sample_idx_src = 0;
- data->get_sample_indices(node, sample_idx_src_buf, &sample_idx_src);
+ int *sample_idx_src_buf = temp_buf + n;
+ const int* sample_idx_src = data->get_sample_indices(node, sample_idx_src_buf);
for(i = 0; i < n; i++)
temp_buf[i] = sample_idx_src[i];