]> rtime.felk.cvut.cz Git - opencv.git/blob - opencv/doc/cvaux_object_detection.tex
documentation for the Calonder descriptor
[opencv.git] / opencv / doc / cvaux_object_detection.tex
1 \section{Object detection and descriptors}
2 \ifCpp
3
4 \cvclass{RandomizedTree}
5 The class contains base structure for \texttt{RTreeClassifier}
6
7 \begin{lstlisting}
8 class CV_EXPORTS RandomizedTree
9 {  
10 public:
11         friend class RTreeClassifier;  
12
13         RandomizedTree();
14         ~RandomizedTree();
15
16         void train(std::vector<BaseKeypoint> const& base_set,
17                  cv::RNG &rng, int depth, int views,
18                  size_t reduced_num_dim, int num_quant_bits);
19         void train(std::vector<BaseKeypoint> const& base_set,
20                  cv::RNG &rng, PatchGenerator &make_patch, int depth,
21                  int views, size_t reduced_num_dim, int num_quant_bits);
22
23         // following two funcs are EXPERIMENTAL 
24         //(do not use unless you know exactly what you do)
25         static void quantizeVector(float *vec, int dim, int N, float bnds[2],
26                  int clamp_mode=0);
27         static void quantizeVector(float *src, int dim, int N, float bnds[2],
28                  uchar *dst);  
29
30         // patch_data must be a 32x32 array (no row padding)
31         float* getPosterior(uchar* patch_data);
32         const float* getPosterior(uchar* patch_data) const;
33         uchar* getPosterior2(uchar* patch_data);
34
35         void read(const char* file_name, int num_quant_bits);
36         void read(std::istream &is, int num_quant_bits);
37         void write(const char* file_name) const;
38         void write(std::ostream &os) const;
39
40         int classes() { return classes_; }
41         int depth() { return depth_; }
42
43         void discardFloatPosteriors() { freePosteriors(1); }
44
45         inline void applyQuantization(int num_quant_bits)
46                  { makePosteriors2(num_quant_bits); }
47
48 private:
49         int classes_;
50         int depth_;
51         int num_leaves_;  
52         std::vector<RTreeNode> nodes_;  
53         float **posteriors_;        // 16-bytes aligned posteriors
54         uchar **posteriors2_;     // 16-bytes aligned posteriors
55         std::vector<int> leaf_counts_;
56
57         void createNodes(int num_nodes, cv::RNG &rng);
58         void allocPosteriorsAligned(int num_leaves, int num_classes);
59         void freePosteriors(int which);   
60                  // which: 1=posteriors_, 2=posteriors2_, 3=both
61         void init(int classes, int depth, cv::RNG &rng);
62         void addExample(int class_id, uchar* patch_data);
63         void finalize(size_t reduced_num_dim, int num_quant_bits);  
64         int getIndex(uchar* patch_data) const;
65         inline float* getPosteriorByIndex(int index);
66         inline uchar* getPosteriorByIndex2(int index);
67         inline const float* getPosteriorByIndex(int index) const;
68         void convertPosteriorsToChar();
69         void makePosteriors2(int num_quant_bits);
70         void compressLeaves(size_t reduced_num_dim);  
71         void estimateQuantPercForPosteriors(float perc[2]);
72 };
73 \end{lstlisting}
74
75 \cvCppFunc{RandomizedTree::train}
76 Trains a randomized tree using input set of keypoints
77
78 \cvdefCpp{
79 void train(std::vector<BaseKeypoint> const\& base\_set, cv::RNG \&rng,
80                         PatchGenerator \&make\_patch, int depth, int views, size\_t reduced\_num\_dim,
81                         int num\_quant\_bits);  
82                         }
83 \cvdefCpp{
84 void train(std::vector<BaseKeypoint> const\& base\_set, cv::RNG \&rng,
85                         PatchGenerator \&make\_patch, int depth, int views, size\_t reduced\_num\_dim,
86                         int num\_quant\_bits);  
87                         }                               
88 \begin{description}
89 \cvarg{base\_set} {Vector of \texttt{BaseKeypoint} type. Contains keypoints from the image are used for training}
90 \cvarg{rng} {Random numbers generator is used for training}
91 \cvarg{make\_patch} {Patch generator is used for training}
92 \cvarg{depth} {Maximum tree depth}
93 %\cvarg{views} {}
94 \cvarg{reduced\_num\_dim} {Number of dimensions are used in compressed signature}
95 \cvarg{num\_quant\_bits} {Number of bits are used for quantization}
96 \end{description}               
97
98 \cvCppFunc {RandomizedTree::read}
99 Reads pre-saved randomized tree from file or stream
100 \cvdefCpp{read(const char* file\_name, int num\_quant\_bits)}   
101 \cvdefCpp{read(std::istream \&is, int num\_quant\_bits)}        
102 \begin{description}
103 \cvarg{file\_name}{Filename of file contains randomized tree data}
104 \cvarg{is}{Input stream associated with file contains randomized tree data}
105 \cvarg{num\_quant\_bits} {Number of bits are used for quantization}
106 \end{description}
107
108 \cvCppFunc {RandomizedTree::write}
109 Writes current randomized tree to a file or stream
110 \cvdefCpp{void write(const char* file\_name) const;}    
111 \cvdefCpp{void write(std::ostream \&os) const;} 
112 \begin{description}
113 \cvarg{file\_name}{Filename of file where randomized tree data will be stored}
114 \cvarg{is}{Output stream associated with file where randomized tree data will be stored}
115 \end{description}
116
117
118 \cvCppFunc {RandomizedTree::applyQuantization}
119 Applies quantization to the current randomized tree
120 \cvdefCpp{void applyQuantization(int num\_quant\_bits)}
121 \begin{description}
122 \cvarg{num\_quant\_bits} {Number of bits are used for quantization}
123 \end{description}
124                 
125                 
126
127
128 \cvstruct{RTreeNode}
129 The class contains base structure for \texttt{RandomizedTree}
130
131 \begin{lstlisting}
132 struct RTreeNode
133 {
134         short offset1, offset2;
135
136         RTreeNode() {}
137
138         RTreeNode(uchar x1, uchar y1, uchar x2, uchar y2)
139                 : offset1(y1*PATCH_SIZE + x1),
140                 offset2(y2*PATCH_SIZE + x2)
141         {}
142
143         //! Left child on 0, right child on 1
144         inline bool operator() (uchar* patch_data) const
145         {
146                 return patch_data[offset1] > patch_data[offset2];
147         }
148 };
149 \end{lstlisting}
150
151
152 \cvclass{RTreeClassifier}
153 The class contains \texttt{RTreeClassifier}. It represents calonder descriptor which was originally introduced by Michael Calonder
154
155 \begin{lstlisting}
156 class CV_EXPORTS RTreeClassifier
157 {   
158 public:
159         static const int DEFAULT_TREES = 48;
160         static const size_t DEFAULT_NUM_QUANT_BITS = 4;  
161
162         RTreeClassifier();
163
164         void train(std::vector<BaseKeypoint> const& base_set, 
165                 cv::RNG &rng,
166                 int num_trees = RTreeClassifier::DEFAULT_TREES,
167                 int depth = DEFAULT_DEPTH,
168                 int views = DEFAULT_VIEWS,
169                 size_t reduced_num_dim = DEFAULT_REDUCED_NUM_DIM,
170                 int num_quant_bits = DEFAULT_NUM_QUANT_BITS,
171                          bool print_status = true);
172         void train(std::vector<BaseKeypoint> const& base_set,
173                 cv::RNG &rng, 
174                 PatchGenerator &make_patch,
175                 int num_trees = RTreeClassifier::DEFAULT_TREES,
176                 int depth = DEFAULT_DEPTH,
177                 int views = DEFAULT_VIEWS,
178                 size_t reduced_num_dim = DEFAULT_REDUCED_NUM_DIM,
179                 int num_quant_bits = DEFAULT_NUM_QUANT_BITS,
180                  bool print_status = true);
181
182         // sig must point to a memory block of at least 
183         //classes()*sizeof(float|uchar) bytes
184         void getSignature(IplImage *patch, uchar *sig);
185         void getSignature(IplImage *patch, float *sig);
186         void getSparseSignature(IplImage *patch, float *sig,
187                  float thresh);
188                  
189         static int countNonZeroElements(float *vec, int n, double tol=1e-10);
190         static inline void safeSignatureAlloc(uchar **sig, int num_sig=1,
191                         int sig_len=176);
192         static inline uchar* safeSignatureAlloc(int num_sig=1,
193                          int sig_len=176);  
194
195         inline int classes() { return classes_; }
196         inline int original_num_classes()
197                  { return original_num_classes_; }
198
199         void setQuantization(int num_quant_bits);
200         void discardFloatPosteriors();
201
202         void read(const char* file_name);
203         void read(std::istream &is);
204         void write(const char* file_name) const;
205         void write(std::ostream &os) const;
206
207         std::vector<RandomizedTree> trees_;
208
209 private:    
210         int classes_;
211         int num_quant_bits_;
212         uchar **posteriors_;
213         ushort *ptemp_;
214         int original_num_classes_;  
215         bool keep_floats_;
216 };
217 \end{lstlisting}
218
219 \cvCppFunc{RTreeClassifier::train}
220 Trains a randomized tree classificator using input set of keypoints
221 \cvdefCpp{
222                 void train(std::vector<BaseKeypoint> const\& base\_set, 
223                         cv::RNG \&rng,
224                         int num\_trees = RTreeClassifier::DEFAULT\_TREES,
225                         int depth = DEFAULT\_DEPTH,
226                         int views = DEFAULT\_VIEWS,
227                         size\_t reduced\_num\_dim = DEFAULT\_REDUCED\_NUM\_DIM,
228                         int num\_quant\_bits = DEFAULT\_NUM\_QUANT\_BITS, bool print\_status = true);
229                         }
230 \cvdefCpp{
231                 void train(std::vector<BaseKeypoint> const\& base\_set,
232                         cv::RNG \&rng, 
233                         PatchGenerator \&make\_patch,
234                         int num\_trees = RTreeClassifier::DEFAULT\_TREES,
235                         int depth = DEFAULT\_DEPTH,
236                         int views = DEFAULT\_VIEWS,
237                         size\_t reduced\_num\_dim = DEFAULT\_REDUCED\_NUM\_DIM,
238                         int num\_quant\_bits = DEFAULT\_NUM\_QUANT\_BITS, bool print\_status = true);
239 }                       
240 \begin{description}
241 \cvarg{base\_set} {Vector of \texttt{BaseKeypoint} type. Contains keypoints from the image are used for training}
242 \cvarg{rng} {Random numbers generator is used for training}
243 \cvarg{make\_patch} {Patch generator is used for training}
244 \cvarg{num\_trees} {Number of randomized trees used in RTreeClassificator}
245 \cvarg{depth} {Maximum tree depth}
246 %\cvarg{views} {}
247 \cvarg{reduced\_num\_dim} {Number of dimensions are used in compressed signature}
248 \cvarg{num\_quant\_bits} {Number of bits are used for quantization}
249 \cvarg{print\_status} {Print current status of training on the console}
250 \end{description}               
251
252 \cvCppFunc{RTreeClassifier::getSignature}
253 Returns signature for image patch 
254 \cvdefCpp{
255 void getSignature(IplImage *patch, uchar *sig)
256 }
257 \cvdefCpp{
258 void getSignature(IplImage *patch, float *sig)
259 }
260 \begin{description}
261 \cvarg{patch} {Image patch to calculate signature for}
262 \cvarg{sig} {Output signature (array dimension is \texttt{reduced\_num\_dim)}}
263 \end{description}
264
265 \cvCppFunc{RTreeClassifier::getSparseSignature}
266 The function is simular to \texttt{getSignature} but uses the threshold for removing all signature elements less than the threshold. So that the signature is compressed
267 \cvdefCpp{
268         void getSparseSignature(IplImage *patch, float *sig,
269                  float thresh);
270 }
271 \begin{description}
272 \cvarg{patch} {Image patch to calculate signature for}
273 \cvarg{sig} {Output signature (array dimension is \texttt{reduced\_num\_dim)}}
274 \cvarg{tresh} {The threshold that is used for compressing the signature}
275 \end{description}
276
277 \cvCppFunc{RTreeClassifier::countNonZeroElements}
278 The function returns the number of non-zero elements in the input array. 
279 \cvdefCpp{
280 static int countNonZeroElements(float *vec, int n, double tol=1e-10);
281 }
282 \begin{description}
283 \cvarg{vec}{Input vector contains float elements}
284 \cvarg{n}{Input vector size}
285 \cvarg{tol} {The threshold used for elements counting. We take all elements are less than \texttt{tol} as zero elements}
286 \end{description}
287
288 \cvCppFunc {RTreeClassifier::read}
289 Reads pre-saved RTreeClassifier from file or stream
290 \cvdefCpp{read(const char* file\_name)} 
291 \cvdefCpp{read(std::istream \&is)}      
292 \begin{description}
293 \cvarg{file\_name}{Filename of file contains randomized tree data}
294 \cvarg{is}{Input stream associated with file contains randomized tree data}
295 \end{description}
296
297 \cvCppFunc {RTreeClassifier::write}
298 Writes current RTreeClassifier to a file or stream
299 \cvdefCpp{void write(const char* file\_name) const;}    
300 \cvdefCpp{void write(std::ostream \&os) const;} 
301 \begin{description}
302 \cvarg{file\_name}{Filename of file where randomized tree data will be stored}
303 \cvarg{is}{Output stream associated with file where randomized tree data will be stored}
304 \end{description}
305
306
307 \cvCppFunc {RTreeClassifier::setQuantization}
308 Applies quantization to the current randomized tree
309 \cvdefCpp{void setQuantization(int num\_quant\_bits)}
310 \begin{description}
311 \cvarg{num\_quant\_bits} {Number of bits are used for quantization}
312 \end{description}               
313
314 Below there is an example of \texttt{RTreeClassifier} usage for feature matching. There are test and train images and we extract features from both with SURF. Output is $best\_corr$ and $best\_corr\_idx$ arrays which keep the best probabilities and corresponding features indexes for every train feature.
315 % ===== Example. Using RTreeClassifier for features matching =====
316 \begin{lstlisting}
317 CvMemStorage* storage = cvCreateMemStorage(0);
318 CvSeq *objectKeypoints = 0, *objectDescriptors = 0;
319 CvSeq *imageKeypoints = 0, *imageDescriptors = 0;
320 CvSURFParams params = cvSURFParams(500, 1);
321 cvExtractSURF( test_image, 0, &imageKeypoints, &imageDescriptors,
322                  storage, params );
323 cvExtractSURF( train_image, 0, &objectKeypoints, &objectDescriptors,
324                  storage, params );
325
326 cv::RTreeClassifier detector;
327 int patch_width = cv::PATCH_SIZE;
328 iint patch_height = cv::PATCH_SIZE;
329 vector<cv::BaseKeypoint> base_set;
330 int i=0;
331 CvSURFPoint* point;
332 for (i=0;i<(n_points > 0 ? n_points : objectKeypoints->total);i++)
333 {
334         point=(CvSURFPoint*)cvGetSeqElem(objectKeypoints,i);
335         base_set.push_back(
336                 cv::BaseKeypoint(point->pt.x,point->pt.y,train_image));
337 }
338
339         //Detector training
340  cv::RNG rng( cvGetTickCount() );
341 cv::PatchGenerator gen(0,255,2,false,0.7,1.3,-CV_PI/3,CV_PI/3,
342                         -CV_PI/3,CV_PI/3);
343
344 printf("RTree Classifier training...\n");
345 detector.train(base_set,rng,gen,24,cv::DEFAULT_DEPTH,2000,
346         (int)base_set.size(), detector.DEFAULT_NUM_QUANT_BITS);
347 printf("Done\n");
348
349 float* signature = new float[detector.original_num_classes()];
350 float* best_corr;
351 int* best_corr_idx;
352 if (imageKeypoints->total > 0)
353 {
354         best_corr = new float[imageKeypoints->total];
355         best_corr_idx = new int[imageKeypoints->total];
356 }
357
358 for(i=0; i < imageKeypoints->total; i++)
359 {
360         point=(CvSURFPoint*)cvGetSeqElem(imageKeypoints,i);
361         int part_idx = -1;
362         float prob = 0.0f;
363
364         CvRect roi = cvRect((int)(point->pt.x) - patch_width/2,
365                 (int)(point->pt.y) - patch_height/2,
366                  patch_width, patch_height);
367         cvSetImageROI(test_image, roi);
368         roi = cvGetImageROI(test_image);
369         if(roi.width != patch_width || roi.height != patch_height)
370         {
371                 best_corr_idx[i] = part_idx;
372                 best_corr[i] = prob;
373         }
374         else
375         {
376                 cvSetImageROI(test_image, roi);
377                 IplImage* roi_image =
378                          cvCreateImage(cvSize(roi.width, roi.height),
379                          test_image->depth, test_image->nChannels);
380                 cvCopy(test_image,roi_image);
381
382                 detector.getSignature(roi_image, signature);
383                 for (int j = 0; j< detector.original_num_classes();j++)
384                 {
385                         if (prob < signature[j])
386                         {
387                                 part_idx = j;
388                                 prob = signature[j];
389                         }
390                 }
391
392                 best_corr_idx[i] = part_idx;
393                 best_corr[i] = prob;
394
395                         
396                 if (roi_image)
397                         cvReleaseImage(&roi_image);
398         }
399         cvResetImageROI(test_image);
400 }
401         
402 \end{lstlisting}
403
404 \fi