documentation for the Calonder descriptor

author vp153 <vp153@73c94f0f-984f-4a5f-82bc-2d8db8d8ee08>

Mon, 5 Apr 2010 15:33:21 +0000 (15:33 +0000)

committer vp153 <vp153@73c94f0f-984f-4a5f-82bc-2d8db8d8ee08>

Mon, 5 Apr 2010 15:33:21 +0000 (15:33 +0000)
author vp153 <vp153@73c94f0f-984f-4a5f-82bc-2d8db8d8ee08>
Mon, 5 Apr 2010 15:33:21 +0000 (15:33 +0000)
committer vp153 <vp153@73c94f0f-984f-4a5f-82bc-2d8db8d8ee08>
Mon, 5 Apr 2010 15:33:21 +0000 (15:33 +0000)
diff --git a/opencv/doc/cvaux_object_detection.tex b/opencv/doc/cvaux_object_detection.tex

index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..f673629d4bd5c0d1612b876c6b66eaef1f72bbd5 100644 (file)
--- a/opencv/doc/cvaux_object_detection.tex
+++ b/opencv/doc/cvaux_object_detection.tex
@@ -0,0 +1,404 @@
+\section{Object detection and descriptors}
+\ifCpp
+
+\cvclass{RandomizedTree}
+The class contains base structure for \texttt{RTreeClassifier}
+
+\begin{lstlisting}
+class CV_EXPORTS RandomizedTree
+{  
+public:
+       friend class RTreeClassifier;  
+
+       RandomizedTree();
+       ~RandomizedTree();
+
+       void train(std::vector<BaseKeypoint> const& base_set,
+                cv::RNG &rng, int depth, int views,
+                size_t reduced_num_dim, int num_quant_bits);
+       void train(std::vector<BaseKeypoint> const& base_set,
+                cv::RNG &rng, PatchGenerator &make_patch, int depth,
+                int views, size_t reduced_num_dim, int num_quant_bits);
+
+       // following two funcs are EXPERIMENTAL 
+       //(do not use unless you know exactly what you do)
+       static void quantizeVector(float *vec, int dim, int N, float bnds[2],
+                int clamp_mode=0);
+       static void quantizeVector(float *src, int dim, int N, float bnds[2],
+                uchar *dst);  
+
+       // patch_data must be a 32x32 array (no row padding)
+       float* getPosterior(uchar* patch_data);
+       const float* getPosterior(uchar* patch_data) const;
+       uchar* getPosterior2(uchar* patch_data);
+
+       void read(const char* file_name, int num_quant_bits);
+       void read(std::istream &is, int num_quant_bits);
+       void write(const char* file_name) const;
+       void write(std::ostream &os) const;
+
+       int classes() { return classes_; }
+       int depth() { return depth_; }
+
+       void discardFloatPosteriors() { freePosteriors(1); }
+
+       inline void applyQuantization(int num_quant_bits)
+                { makePosteriors2(num_quant_bits); }
+
+private:
+       int classes_;
+       int depth_;
+       int num_leaves_;  
+       std::vector<RTreeNode> nodes_;  
+       float **posteriors_;        // 16-bytes aligned posteriors
+       uchar **posteriors2_;     // 16-bytes aligned posteriors
+       std::vector<int> leaf_counts_;
+
+       void createNodes(int num_nodes, cv::RNG &rng);
+       void allocPosteriorsAligned(int num_leaves, int num_classes);
+       void freePosteriors(int which);   
+                // which: 1=posteriors_, 2=posteriors2_, 3=both
+       void init(int classes, int depth, cv::RNG &rng);
+       void addExample(int class_id, uchar* patch_data);
+       void finalize(size_t reduced_num_dim, int num_quant_bits);  
+       int getIndex(uchar* patch_data) const;
+       inline float* getPosteriorByIndex(int index);
+       inline uchar* getPosteriorByIndex2(int index);
+       inline const float* getPosteriorByIndex(int index) const;
+       void convertPosteriorsToChar();
+       void makePosteriors2(int num_quant_bits);
+       void compressLeaves(size_t reduced_num_dim);  
+       void estimateQuantPercForPosteriors(float perc[2]);
+};
+\end{lstlisting}
+
+\cvCppFunc{RandomizedTree::train}
+Trains a randomized tree using input set of keypoints
+
+\cvdefCpp{
+void train(std::vector<BaseKeypoint> const\& base\_set, cv::RNG \&rng,
+                       PatchGenerator \&make\_patch, int depth, int views, size\_t reduced\_num\_dim,
+                       int num\_quant\_bits);  
+                       }
+\cvdefCpp{
+void train(std::vector<BaseKeypoint> const\& base\_set, cv::RNG \&rng,
+                       PatchGenerator \&make\_patch, int depth, int views, size\_t reduced\_num\_dim,
+                       int num\_quant\_bits);  
+                       }                               
+\begin{description}
+\cvarg{base\_set} {Vector of \texttt{BaseKeypoint} type. Contains keypoints from the image are used for training}
+\cvarg{rng} {Random numbers generator is used for training}
+\cvarg{make\_patch} {Patch generator is used for training}
+\cvarg{depth} {Maximum tree depth}
+%\cvarg{views} {}
+\cvarg{reduced\_num\_dim} {Number of dimensions are used in compressed signature}
+\cvarg{num\_quant\_bits} {Number of bits are used for quantization}
+\end{description}              
+
+\cvCppFunc {RandomizedTree::read}
+Reads pre-saved randomized tree from file or stream
+\cvdefCpp{read(const char* file\_name, int num\_quant\_bits)}  
+\cvdefCpp{read(std::istream \&is, int num\_quant\_bits)}       
+\begin{description}
+\cvarg{file\_name}{Filename of file contains randomized tree data}
+\cvarg{is}{Input stream associated with file contains randomized tree data}
+\cvarg{num\_quant\_bits} {Number of bits are used for quantization}
+\end{description}
+
+\cvCppFunc {RandomizedTree::write}
+Writes current randomized tree to a file or stream
+\cvdefCpp{void write(const char* file\_name) const;}   
+\cvdefCpp{void write(std::ostream \&os) const;}        
+\begin{description}
+\cvarg{file\_name}{Filename of file where randomized tree data will be stored}
+\cvarg{is}{Output stream associated with file where randomized tree data will be stored}
+\end{description}
+
+
+\cvCppFunc {RandomizedTree::applyQuantization}
+Applies quantization to the current randomized tree
+\cvdefCpp{void applyQuantization(int num\_quant\_bits)}
+\begin{description}
+\cvarg{num\_quant\_bits} {Number of bits are used for quantization}
+\end{description}
+               
+               
+
+
+\cvstruct{RTreeNode}
+The class contains base structure for \texttt{RandomizedTree}
+
+\begin{lstlisting}
+struct RTreeNode
+{
+       short offset1, offset2;
+
+       RTreeNode() {}
+
+       RTreeNode(uchar x1, uchar y1, uchar x2, uchar y2)
+               : offset1(y1*PATCH_SIZE + x1),
+               offset2(y2*PATCH_SIZE + x2)
+       {}
+
+       //! Left child on 0, right child on 1
+       inline bool operator() (uchar* patch_data) const
+       {
+               return patch_data[offset1] > patch_data[offset2];
+       }
+};
+\end{lstlisting}
+
+
+\cvclass{RTreeClassifier}
+The class contains \texttt{RTreeClassifier}. It represents calonder descriptor which was originally introduced by Michael Calonder
+
+\begin{lstlisting}
+class CV_EXPORTS RTreeClassifier
+{   
+public:
+       static const int DEFAULT_TREES = 48;
+       static const size_t DEFAULT_NUM_QUANT_BITS = 4;  
+
+       RTreeClassifier();
+
+       void train(std::vector<BaseKeypoint> const& base_set, 
+               cv::RNG &rng,
+               int num_trees = RTreeClassifier::DEFAULT_TREES,
+               int depth = DEFAULT_DEPTH,
+               int views = DEFAULT_VIEWS,
+               size_t reduced_num_dim = DEFAULT_REDUCED_NUM_DIM,
+               int num_quant_bits = DEFAULT_NUM_QUANT_BITS,
+                        bool print_status = true);
+       void train(std::vector<BaseKeypoint> const& base_set,
+               cv::RNG &rng, 
+               PatchGenerator &make_patch,
+               int num_trees = RTreeClassifier::DEFAULT_TREES,
+               int depth = DEFAULT_DEPTH,
+               int views = DEFAULT_VIEWS,
+               size_t reduced_num_dim = DEFAULT_REDUCED_NUM_DIM,
+               int num_quant_bits = DEFAULT_NUM_QUANT_BITS,
+                bool print_status = true);
+
+       // sig must point to a memory block of at least 
+       //classes()*sizeof(float|uchar) bytes
+       void getSignature(IplImage *patch, uchar *sig);
+       void getSignature(IplImage *patch, float *sig);
+       void getSparseSignature(IplImage *patch, float *sig,
+                float thresh);
+                
+       static int countNonZeroElements(float *vec, int n, double tol=1e-10);
+       static inline void safeSignatureAlloc(uchar **sig, int num_sig=1,
+                       int sig_len=176);
+       static inline uchar* safeSignatureAlloc(int num_sig=1,
+                        int sig_len=176);  
+
+       inline int classes() { return classes_; }
+       inline int original_num_classes()
+                { return original_num_classes_; }
+
+       void setQuantization(int num_quant_bits);
+       void discardFloatPosteriors();
+
+       void read(const char* file_name);
+       void read(std::istream &is);
+       void write(const char* file_name) const;
+       void write(std::ostream &os) const;
+
+       std::vector<RandomizedTree> trees_;
+
+private:    
+       int classes_;
+       int num_quant_bits_;
+       uchar **posteriors_;
+       ushort *ptemp_;
+       int original_num_classes_;  
+       bool keep_floats_;
+};
+\end{lstlisting}
+
+\cvCppFunc{RTreeClassifier::train}
+Trains a randomized tree classificator using input set of keypoints
+\cvdefCpp{
+               void train(std::vector<BaseKeypoint> const\& base\_set, 
+                       cv::RNG \&rng,
+                       int num\_trees = RTreeClassifier::DEFAULT\_TREES,
+                       int depth = DEFAULT\_DEPTH,
+                       int views = DEFAULT\_VIEWS,
+                       size\_t reduced\_num\_dim = DEFAULT\_REDUCED\_NUM\_DIM,
+                       int num\_quant\_bits = DEFAULT\_NUM\_QUANT\_BITS, bool print\_status = true);
+                       }
+\cvdefCpp{
+               void train(std::vector<BaseKeypoint> const\& base\_set,
+                       cv::RNG \&rng, 
+                       PatchGenerator \&make\_patch,
+                       int num\_trees = RTreeClassifier::DEFAULT\_TREES,
+                       int depth = DEFAULT\_DEPTH,
+                       int views = DEFAULT\_VIEWS,
+                       size\_t reduced\_num\_dim = DEFAULT\_REDUCED\_NUM\_DIM,
+                       int num\_quant\_bits = DEFAULT\_NUM\_QUANT\_BITS, bool print\_status = true);
+}                      
+\begin{description}
+\cvarg{base\_set} {Vector of \texttt{BaseKeypoint} type. Contains keypoints from the image are used for training}
+\cvarg{rng} {Random numbers generator is used for training}
+\cvarg{make\_patch} {Patch generator is used for training}
+\cvarg{num\_trees} {Number of randomized trees used in RTreeClassificator}
+\cvarg{depth} {Maximum tree depth}
+%\cvarg{views} {}
+\cvarg{reduced\_num\_dim} {Number of dimensions are used in compressed signature}
+\cvarg{num\_quant\_bits} {Number of bits are used for quantization}
+\cvarg{print\_status} {Print current status of training on the console}
+\end{description}              
+
+\cvCppFunc{RTreeClassifier::getSignature}
+Returns signature for image patch 
+\cvdefCpp{
+void getSignature(IplImage *patch, uchar *sig)
+}
+\cvdefCpp{
+void getSignature(IplImage *patch, float *sig)
+}
+\begin{description}
+\cvarg{patch} {Image patch to calculate signature for}
+\cvarg{sig} {Output signature (array dimension is \texttt{reduced\_num\_dim)}}
+\end{description}
+
+\cvCppFunc{RTreeClassifier::getSparseSignature}
+The function is simular to \texttt{getSignature} but uses the threshold for removing all signature elements less than the threshold. So that the signature is compressed
+\cvdefCpp{
+       void getSparseSignature(IplImage *patch, float *sig,
+                float thresh);
+}
+\begin{description}
+\cvarg{patch} {Image patch to calculate signature for}
+\cvarg{sig} {Output signature (array dimension is \texttt{reduced\_num\_dim)}}
+\cvarg{tresh} {The threshold that is used for compressing the signature}
+\end{description}
+
+\cvCppFunc{RTreeClassifier::countNonZeroElements}
+The function returns the number of non-zero elements in the input array. 
+\cvdefCpp{
+static int countNonZeroElements(float *vec, int n, double tol=1e-10);
+}
+\begin{description}
+\cvarg{vec}{Input vector contains float elements}
+\cvarg{n}{Input vector size}
+\cvarg{tol} {The threshold used for elements counting. We take all elements are less than \texttt{tol} as zero elements}
+\end{description}
+
+\cvCppFunc {RTreeClassifier::read}
+Reads pre-saved RTreeClassifier from file or stream
+\cvdefCpp{read(const char* file\_name)}        
+\cvdefCpp{read(std::istream \&is)}     
+\begin{description}
+\cvarg{file\_name}{Filename of file contains randomized tree data}
+\cvarg{is}{Input stream associated with file contains randomized tree data}
+\end{description}
+
+\cvCppFunc {RTreeClassifier::write}
+Writes current RTreeClassifier to a file or stream
+\cvdefCpp{void write(const char* file\_name) const;}   
+\cvdefCpp{void write(std::ostream \&os) const;}        
+\begin{description}
+\cvarg{file\_name}{Filename of file where randomized tree data will be stored}
+\cvarg{is}{Output stream associated with file where randomized tree data will be stored}
+\end{description}
+
+
+\cvCppFunc {RTreeClassifier::setQuantization}
+Applies quantization to the current randomized tree
+\cvdefCpp{void setQuantization(int num\_quant\_bits)}
+\begin{description}
+\cvarg{num\_quant\_bits} {Number of bits are used for quantization}
+\end{description}              
+
+Below there is an example of \texttt{RTreeClassifier} usage for feature matching. There are test and train images and we extract features from both with SURF. Output is $best\_corr$ and $best\_corr\_idx$ arrays which keep the best probabilities and corresponding features indexes for every train feature.
+% ===== Example. Using RTreeClassifier for features matching =====
+\begin{lstlisting}
+CvMemStorage* storage = cvCreateMemStorage(0);
+CvSeq *objectKeypoints = 0, *objectDescriptors = 0;
+CvSeq *imageKeypoints = 0, *imageDescriptors = 0;
+CvSURFParams params = cvSURFParams(500, 1);
+cvExtractSURF( test_image, 0, &imageKeypoints, &imageDescriptors,
+                storage, params );
+cvExtractSURF( train_image, 0, &objectKeypoints, &objectDescriptors,
+                storage, params );
+
+cv::RTreeClassifier detector;
+int patch_width = cv::PATCH_SIZE;
+iint patch_height = cv::PATCH_SIZE;
+vector<cv::BaseKeypoint> base_set;
+int i=0;
+CvSURFPoint* point;
+for (i=0;i<(n_points > 0 ? n_points : objectKeypoints->total);i++)
+{
+       point=(CvSURFPoint*)cvGetSeqElem(objectKeypoints,i);
+       base_set.push_back(
+               cv::BaseKeypoint(point->pt.x,point->pt.y,train_image));
+}
+
+       //Detector training
+ cv::RNG rng( cvGetTickCount() );
+cv::PatchGenerator gen(0,255,2,false,0.7,1.3,-CV_PI/3,CV_PI/3,
+                       -CV_PI/3,CV_PI/3);
+
+printf("RTree Classifier training...\n");
+detector.train(base_set,rng,gen,24,cv::DEFAULT_DEPTH,2000,
+       (int)base_set.size(), detector.DEFAULT_NUM_QUANT_BITS);
+printf("Done\n");
+
+float* signature = new float[detector.original_num_classes()];
+float* best_corr;
+int* best_corr_idx;
+if (imageKeypoints->total > 0)
+{
+       best_corr = new float[imageKeypoints->total];
+       best_corr_idx = new int[imageKeypoints->total];
+}
+
+for(i=0; i < imageKeypoints->total; i++)
+{
+       point=(CvSURFPoint*)cvGetSeqElem(imageKeypoints,i);
+       int part_idx = -1;
+       float prob = 0.0f;
+
+       CvRect roi = cvRect((int)(point->pt.x) - patch_width/2,
+               (int)(point->pt.y) - patch_height/2,
+                patch_width, patch_height);
+       cvSetImageROI(test_image, roi);
+       roi = cvGetImageROI(test_image);
+       if(roi.width != patch_width || roi.height != patch_height)
+       {
+               best_corr_idx[i] = part_idx;
+               best_corr[i] = prob;
+       }
+       else
+       {
+               cvSetImageROI(test_image, roi);
+               IplImage* roi_image =
+                        cvCreateImage(cvSize(roi.width, roi.height),
+                        test_image->depth, test_image->nChannels);
+               cvCopy(test_image,roi_image);
+
+               detector.getSignature(roi_image, signature);
+               for (int j = 0; j< detector.original_num_classes();j++)
+               {
+                       if (prob < signature[j])
+                       {
+                               part_idx = j;
+                               prob = signature[j];
+                       }
+               }
+
+               best_corr_idx[i] = part_idx;
+               best_corr[i] = prob;
+
+                       
+               if (roi_image)
+                       cvReleaseImage(&roi_image);
+       }
+       cvResetImageROI(test_image);
+}
+       
+\end{lstlisting}
+
+\fi
+\ No newline at end of file
author	vp153 <vp153@73c94f0f-984f-4a5f-82bc-2d8db8d8ee08>
	Mon, 5 Apr 2010 15:33:21 +0000 (15:33 +0000)
committer	vp153 <vp153@73c94f0f-984f-4a5f-82bc-2d8db8d8ee08>
	Mon, 5 Apr 2010 15:33:21 +0000 (15:33 +0000)