opencv/src/cv/cvsurf.cpp

   1 /* Original code has been submitted by Liu Liu. Here is the copyright.
   2 ----------------------------------------------------------------------------------
   3  * An OpenCV Implementation of SURF
   4  * Further Information Refer to "SURF: Speed-Up Robust Feature"
   5  * Author: Liu Liu
   6  * liuliu.1987+opencv@gmail.com
   7  *
   8  * There are still serveral lacks for this experimental implementation:
   9  * 1.The interpolation of sub-pixel mentioned in article was not implemented yet;
  10  * 2.A comparision with original libSurf.so shows that the hessian detector is not a 100% match to their implementation;
  11  * 3.Due to above reasons, I recommanded the original one for study and reuse;
  12  *
  13  * However, the speed of this implementation is something comparable to original one.
  14  *
  15  * Copyright© 2008, Liu Liu All rights reserved.
  16  *
  17  * Redistribution and use in source and binary forms, with or
  18  * without modification, are permitted provided that the following
  19  * conditions are met:
  20  *      Redistributions of source code must retain the above
  21  *      copyright notice, this list of conditions and the following
  22  *      disclaimer.
  23  *      Redistributions in binary form must reproduce the above
  24  *      copyright notice, this list of conditions and the following
  25  *      disclaimer in the documentation and/or other materials
  26  *      provided with the distribution.
  27  *      The name of Contributor may not be used to endorse or
  28  *      promote products derived from this software without
  29  *      specific prior written permission.
  30  *
  31  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
  32  * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
  33  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
  34  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  35  * DISCLAIMED. IN NO EVENT SHALL THE CONTRIBUTORS BE LIABLE FOR ANY
  36  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  37  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  38  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
  39  * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  40  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
  41  * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  42  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
  43  * OF SUCH DAMAGE.
  44  */
  45
  46 /*
  47    The following changes have been made, comparing to the original contribution:
  48    1. A lot of small optimizations, less memory allocations, got rid of global buffers
  49    2. Reversed order of cvGetQuadrangleSubPix and cvResize calls; probably less accurate, but much faster
  50    3. The descriptor computing part (which is most expensive) is threaded using OpenMP
  51    (subpixel-accurate keypoint localization and scale estimation are still TBD)
  52 */
  53
  54 /*
  55 Keypoint position and scale interpolation has been implemented as described in
  56 the Brown and Lowe paper cited by the SURF paper.
  57
  58 The sampling step along the x and y axes of the image for the determinant of the
  59 Hessian is now the same for each layer in an octave. While this increases the
  60 computation time, it ensures that a true 3x3x3 neighbourhood exists, with
  61 samples calculated at the same position in the layers above and below. This
  62 results in improved maxima detection and non-maxima suppression, and I think it
  63 is consistent with the description in the SURF paper.
  64
  65 The wavelet size sampling interval has also been made consistent. The wavelet
  66 size at the first layer of the first octave is now 9 instead of 7. Along with
  67 regular position sampling steps, this makes location and scale interpolation
  68 easy. I think this is consistent with the SURF paper and original
  69 implementation.
  70
  71 The scaling of the wavelet parameters has been fixed to ensure that the patterns
  72 are symmetric around the centre. Previously the truncation caused by integer
  73 division in the scaling ratio caused a bias towards the top left of the wavelet,
  74 resulting in inconsistent keypoint positions.
  75
  76 The matrices for the determinant and trace of the Hessian are now reused in each
  77 octave.
  78
  79 The extraction of the patch of pixels surrounding a keypoint used to build a
  80 descriptor has been simplified.
  81
  82 Keypoint descriptor normalisation has been changed from normalising each 4x4
  83 cell (resulting in a descriptor of magnitude 16) to normalising the entire
  84 descriptor to magnitude 1.
  85
  86 The default number of octaves has been increased from 3 to 4 to match the
  87 original SURF binary default. The increase in computation time is minimal since
  88 the higher octaves are sampled sparsely.
  89
  90 The default number of layers per octave has been reduced from 3 to 2, to prevent
  91 redundant calculation of similar sizes in consecutive octaves.  This decreases
  92 computation time. The number of features extracted may be less, however the
  93 additional features were mostly redundant.
  94
  95 The radius of the circle of gradient samples used to assign an orientation has
  96 been increased from 4 to 6 to match the description in the SURF paper. This is
  97 now defined by ORI_RADIUS, and could be made into a parameter.
  98
  99 The size of the sliding window used in orientation assignment has been reduced
 100 from 120 to 60 degrees to match the description in the SURF paper. This is now
 101 defined by ORI_WIN, and could be made into a parameter.
 102
 103 Other options like  HAAR_SIZE0, HAAR_SIZE_INC, SAMPLE_STEP0, ORI_SEARCH_INC,
 104 ORI_SIGMA and DESC_SIGMA have been separated from the code and documented.
 105 These could also be made into parameters.
 106
 107 Modifications by Ian Mahon
 108
 109 */
 110 #include "_cv.h"
 111
 112 CvSURFParams cvSURFParams(double threshold, int extended)
 113 {
 114     CvSURFParams params;
 115     params.hessianThreshold = threshold;
 116     params.extended = extended;
 117     params.nOctaves = 4;
 118     params.nOctaveLayers = 2;
 119     return params;
 120 }
 121
 122 struct CvSurfHF
 123 {
 124     int p0, p1, p2, p3;
 125     float w;
 126 };
 127
 128 CV_INLINE float
 129 icvCalcHaarPattern( const int* origin, const CvSurfHF* f, int n )
 130 {
 131     double d = 0;
 132     for( int k = 0; k < n; k++ )
 133         d += (origin[f[k].p0] + origin[f[k].p3] - origin[f[k].p1] - origin[f[k].p2])*f[k].w;
 134     return (float)d;
 135 }
 136
 137 static void
 138 icvResizeHaarPattern( const int src[][5], CvSurfHF* dst, int n, int oldSize, int newSize, int widthStep )
 139 {
 140     float ratio = (float)newSize/oldSize;
 141     for( int k = 0; k < n; k++ )
 142     {
 143         int dx1 = cvRound( ratio*src[k][0] );
 144         int dy1 = cvRound( ratio*src[k][1] );
 145         int dx2 = cvRound( ratio*src[k][2] );
 146         int dy2 = cvRound( ratio*src[k][3] );
 147         dst[k].p0 = dy1*widthStep + dx1;
 148         dst[k].p1 = dy2*widthStep + dx1;
 149         dst[k].p2 = dy1*widthStep + dx2;
 150         dst[k].p3 = dy2*widthStep + dx2;
 151         dst[k].w = src[k][4]/((float)(dx2-dx1)*(dy2-dy1));
 152     }
 153 }
 154
 155 /*
 156  * Maxima location interpolation as described in "Invariant Features from
 157  * Interest Point Groups" by Matthew Brown and David Lowe. This is performed by
 158  * fitting a 3D quadratic to a set of neighbouring samples.
 159  *
 160  * The gradient vector and Hessian matrix at the initial keypoint location are
 161  * approximated using central differences. The linear system Ax = b is then
 162  * solved, where A is the Hessian, b is the negative gradient, and x is the
 163  * offset of the interpolated maxima coordinates from the initial estimate.
 164  * This is equivalent to an iteration of Netwon's optimisation algorithm.
 165  *
 166  * N9 contains the samples in the 3x3x3 neighbourhood of the maxima
 167  * dx is the sampling step in x
 168  * dy is the sampling step in y
 169  * ds is the sampling step in size
 170  * point contains the keypoint coordinates and scale to be modified
 171  *
 172  * Return value is 1 if interpolation was successful, 0 on failure.
 173  */
 174 CV_INLINE int
 175 icvInterpolateKeypoint( float N9[3][9], int dx, int dy, int ds, CvSURFPoint *point )
 176 {
 177     int solve_ok;
 178     float A[9], x[3], b[3];
 179     CvMat _A = cvMat(3, 3, CV_32F, A);
 180     CvMat _x = cvMat(3, 1, CV_32F, x);
 181     CvMat _b = cvMat(3, 1, CV_32F, b);
 182
 183     b[0] = -(N9[1][5]-N9[1][3])/2;  /* Negative 1st deriv with respect to x */
 184     b[1] = -(N9[1][7]-N9[1][1])/2;  /* Negative 1st deriv with respect to y */
 185     b[2] = -(N9[2][4]-N9[0][4])/2;  /* Negative 1st deriv with respect to s */
 186
 187     A[0] = N9[1][3]-2*N9[1][4]+N9[1][5];            /* 2nd deriv x, x */
 188     A[1] = (N9[1][8]-N9[1][6]-N9[1][2]+N9[1][0])/4; /* 2nd deriv x, y */
 189     A[2] = (N9[2][5]-N9[2][3]-N9[0][5]+N9[0][3])/4; /* 2nd deriv x, s */
 190     A[3] = A[1];                                    /* 2nd deriv y, x */
 191     A[4] = N9[1][1]-2*N9[1][4]+N9[1][7];            /* 2nd deriv y, y */
 192     A[5] = (N9[2][7]-N9[2][1]-N9[0][7]+N9[0][1])/4; /* 2nd deriv y, s */
 193     A[6] = A[2];                                    /* 2nd deriv s, x */
 194     A[7] = A[5];                                    /* 2nd deriv s, y */
 195     A[8] = N9[0][4]-2*N9[1][4]+N9[2][4];            /* 2nd deriv s, s */
 196
 197     solve_ok = cvSolve( &_A, &_b, &_x );
 198     if( solve_ok )
 199     {
 200         point->pt.x += x[0]*dx;
 201         point->pt.y += x[1]*dy;
 202         point->size = cvRound( point->size + x[2]*ds );
 203     }
 204     return solve_ok;
 205 }
 206
 207
 208 static CvSeq* icvFastHessianDetector( const CvMat* sum, const CvMat* mask_sum,
 209     CvMemStorage* storage, const CvSURFParams* params )
 210 {
 211     CvSeq* points = cvCreateSeq( 0, sizeof(CvSeq), sizeof(CvSURFPoint), storage );
 212
 213     /* Wavelet size at first layer of first octave. */
 214     const int HAAR_SIZE0 = 9;
 215
 216     /* Wavelet size increment between layers. This should be an even number,
 217        such that the wavelet sizes in an octave are either all even or all odd.
 218        This ensures that when looking for the neighbours of a sample, the layers
 219        above and below are aligned correctly. */
 220     const int HAAR_SIZE_INC = 6;
 221
 222     /* Sampling step along image x and y axes at first octave. This is doubled
 223        for each additional octave. WARNING: Increasing this improves speed,
 224        however keypoint extraction becomes unreliable. */
 225     const int SAMPLE_STEP0 = 1;
 226
 227
 228     /* Wavelet Data */
 229     const int NX=3, NY=3, NXY=4, NM=1;
 230     const int dx_s[NX][5] = { {0, 2, 3, 7, 1}, {3, 2, 6, 7, -2}, {6, 2, 9, 7, 1} };
 231     const int dy_s[NY][5] = { {2, 0, 7, 3, 1}, {2, 3, 7, 6, -2}, {2, 6, 7, 9, 1} };
 232     const int dxy_s[NXY][5] = { {1, 1, 4, 4, 1}, {5, 1, 8, 4, -1}, {1, 5, 4, 8, -1}, {5, 5, 8, 8, 1} };
 233     const int dm[NM][5] = { {0, 0, 9, 9, 1} };
 234     CvSurfHF Dx[NX], Dy[NY], Dxy[NXY], Dm;
 235
 236     CvMat** dets = (CvMat**)cvStackAlloc((params->nOctaveLayers+2)*sizeof(dets[0]));
 237     CvMat** traces = (CvMat**)cvStackAlloc((params->nOctaveLayers+2)*sizeof(traces[0]));
 238     int *sizes = (int*)cvStackAlloc((params->nOctaveLayers+2)*sizeof(sizes[0]));
 239
 240     double dx = 0, dy = 0, dxy = 0;
 241     int octave, layer, sampleStep, size, margin;
 242     int rows, cols;
 243     int i, j, sum_i, sum_j;
 244     const int* s_ptr;
 245     float *det_ptr, *trace_ptr;
 246
 247     /* Allocate enough space for hessian determinant and trace matrices at the
 248        first octave. Clearing these initially or between octaves is not
 249        required, since all values that are accessed are first calculated */
 250     for( layer = 0; layer <= params->nOctaveLayers+1; layer++ )
 251     {
 252         dets[layer]   = cvCreateMat( (sum->rows-1)/SAMPLE_STEP0, (sum->cols-1)/SAMPLE_STEP0, CV_32FC1 );
 253         traces[layer] = cvCreateMat( (sum->rows-1)/SAMPLE_STEP0, (sum->cols-1)/SAMPLE_STEP0, CV_32FC1 );
 254     }
 255
 256     for( octave = 0, sampleStep=SAMPLE_STEP0; octave < params->nOctaves; octave++, sampleStep*=2 )
 257     {
 258         /* Hessian determinant and trace sample array size in this octave */
 259         rows = (sum->rows-1)/sampleStep;
 260         cols = (sum->cols-1)/sampleStep;
 261
 262         /* Calculate the determinant and trace of the hessian */
 263         for( layer = 0; layer <= params->nOctaveLayers+1; layer++ )
 264         {
 265             sizes[layer] = size = (HAAR_SIZE0+HAAR_SIZE_INC*layer)<<octave;
 266             icvResizeHaarPattern( dx_s, Dx, NX, 9, size, sum->cols );
 267             icvResizeHaarPattern( dy_s, Dy, NY, 9, size, sum->cols );
 268             icvResizeHaarPattern( dxy_s, Dxy, NXY, 9, size, sum->cols );
 269             /*printf( "octave=%d layer=%d size=%d rows=%d cols=%d\n", octave, layer, size, rows, cols );*/
 270
 271             margin = (size/2)/sampleStep;
 272             for( sum_i=0, i=margin; sum_i<=(sum->rows-1)-size; sum_i+=sampleStep, i++ )
 273             {
 274                 s_ptr = sum->data.i + sum_i*sum->cols;
 275                 det_ptr = dets[layer]->data.fl + i*dets[layer]->cols + margin;
 276                 trace_ptr = traces[layer]->data.fl + i*traces[layer]->cols + margin;
 277                 for( sum_j=0, j=margin; sum_j<=(sum->cols-1)-size; sum_j+=sampleStep, j++ )
 278                 {
 279                     dx  = icvCalcHaarPattern( s_ptr, Dx, 3 );
 280                     dy  = icvCalcHaarPattern( s_ptr, Dy, 3 );
 281                     dxy = icvCalcHaarPattern( s_ptr, Dxy, 4 );
 282                     s_ptr+=sampleStep;
 283                     *det_ptr++ = (float)(dx*dy - 0.81*dxy*dxy);
 284                     *trace_ptr++ = (float)(dx + dy);
 285                 }
 286             }
 287         }
 288
 289         /* Find maxima in the determinant of the hessian */
 290         for( layer = 1; layer <= params->nOctaveLayers; layer++ )
 291         {
 292             size = sizes[layer];
 293             icvResizeHaarPattern( dm, &Dm, NM, 9, size, mask_sum ? mask_sum->cols : sum->cols );
 294
 295             /* Ignore pixels without a 3x3 neighbourhood in the layer above */
 296             margin = (sizes[layer+1]/2)/sampleStep+1;
 297             for( i = margin; i < rows-margin; i++ )
 298             {
 299                 det_ptr = dets[layer]->data.fl + i*dets[layer]->cols;
 300                 trace_ptr = traces[layer]->data.fl + i*traces[layer]->cols;
 301                 for( j = margin; j < cols-margin; j++ )
 302                 {
 303                     float val0 = det_ptr[j];
 304                     if( val0 > params->hessianThreshold )
 305                     {
 306                         /* Coordinates for the start of the wavelet in the sum image. There
 307                            is some integer division involved, so don't try to simplify this
 308                            (cancel out sampleStep) without checking the result is the same */
 309                         int sum_i = sampleStep*(i-(size/2)/sampleStep);
 310                         int sum_j = sampleStep*(j-(size/2)/sampleStep);
 311
 312                         /* The 3x3x3 neighbouring samples around the maxima.
 313                            The maxima is included at N9[1][4] */
 314                         int c = dets[layer]->cols;
 315                         const float *det1 = dets[layer-1]->data.fl + i*c + j;
 316                         const float *det2 = dets[layer]->data.fl   + i*c + j;
 317                         const float *det3 = dets[layer+1]->data.fl + i*c + j;
 318                         float N9[3][9] = { { det1[-c-1], det1[-c], det1[-c+1],
 319                                              det1[-1]  , det1[0] , det1[1],
 320                                              det1[c-1] , det1[c] , det1[c+1]  },
 321                                            { det2[-c-1], det2[-c], det2[-c+1],
 322                                              det2[-1]  , det2[0] , det2[1],
 323                                              det2[c-1] , det2[c] , det2[c+1 ] },
 324                                            { det3[-c-1], det3[-c], det3[-c+1],
 325                                              det3[-1  ], det3[0] , det3[1],
 326                                              det3[c-1] , det3[c] , det3[c+1 ] } };
 327
 328                         /* Check the mask - why not just check the mask at the center of the wavelet? */
 329                         if( mask_sum )
 330                         {
 331                             const int* mask_ptr = mask_sum->data.i +  mask_sum->cols*sum_i + sum_j;
 332                             float mval = icvCalcHaarPattern( mask_ptr, &Dm, 1 );
 333                             if( mval < 0.5 )
 334                                 continue;
 335                         }
 336
 337                         /* Non-maxima suppression. val0 is at N9[1][4]*/
 338                         if( val0 > N9[0][0] && val0 > N9[0][1] && val0 > N9[0][2] &&
 339                             val0 > N9[0][3] && val0 > N9[0][4] && val0 > N9[0][5] &&
 340                             val0 > N9[0][6] && val0 > N9[0][7] && val0 > N9[0][8] &&
 341                             val0 > N9[1][0] && val0 > N9[1][1] && val0 > N9[1][2] &&
 342                             val0 > N9[1][3]                    && val0 > N9[1][5] &&
 343                             val0 > N9[1][6] && val0 > N9[1][7] && val0 > N9[1][8] &&
 344                             val0 > N9[2][0] && val0 > N9[2][1] && val0 > N9[2][2] &&
 345                             val0 > N9[2][3] && val0 > N9[2][4] && val0 > N9[2][5] &&
 346                             val0 > N9[2][6] && val0 > N9[2][7] && val0 > N9[2][8] )
 347                         {
 348                             /* Calculate the wavelet center coordinates for the maxima */
 349                             double center_i = sum_i + (double)(size-1)/2;
 350                             double center_j = sum_j + (double)(size-1)/2;
 351
 352                             CvSURFPoint point = cvSURFPoint( cvPoint2D32f(center_j,center_i),
 353                                                              CV_SIGN(trace_ptr[j]), sizes[layer], 0, val0 );
 354
 355                             /* Interpolate maxima location within the 3x3x3 neighbourhood  */
 356                             int ds = sizes[layer]-sizes[layer-1];
 357                             int interp_ok = icvInterpolateKeypoint( N9, sampleStep, sampleStep, ds, &point );
 358
 359                             /* Sometimes the interpolation step gives a negative size etc. */
 360                             if( interp_ok && point.size >= 1 &&
 361                                 point.pt.x >= 0 && point.pt.x <= (sum->cols-1) &&
 362                                 point.pt.y >= 0 && point.pt.y <= (sum->rows-1) )
 363                             {
 364                                 /*printf( "Keypoint %f %f %d\n", point.pt.x, point.pt.y, point.size );*/
 365                                 cvSeqPush( points, &point );
 366                             }
 367                         }
 368                     }
 369                 }
 370             }
 371         }
 372     }
 373
 374     /* Clean-up */
 375     for( layer = 0; layer <= params->nOctaveLayers+1; layer++ )
 376     {
 377         cvReleaseMat( &dets[layer] );
 378         cvReleaseMat( &traces[layer] );
 379     }
 380
 381     return points;
 382 }
 383
 384
 385 CV_IMPL void
 386 cvExtractSURF( const CvArr* _img, const CvArr* _mask,
 387                CvSeq** _keypoints, CvSeq** _descriptors,
 388                CvMemStorage* storage, CvSURFParams params )
 389 {
 390     CvMat *sum = 0, *mask1 = 0, *mask_sum = 0, **win_bufs = 0;
 391
 392     if( _keypoints )
 393         *_keypoints = 0;
 394     if( _descriptors )
 395         *_descriptors = 0;
 396
 397     /* Radius of the circle in which to sample gradients to assign an
 398        orientation */
 399     const int ORI_RADIUS = 6;
 400
 401     /* The size of the sliding window (in degrees) used to assign an
 402        orientation */
 403     const int ORI_WIN = 60;
 404
 405     /* Increment used for the orientation sliding window (in degrees) */
 406     const int ORI_SEARCH_INC = 5;
 407
 408     /* Standard deviation of the Gaussian used to weight the gradient samples
 409        used to assign an orientation */
 410     const float ORI_SIGMA = 2.5f;
 411
 412     /* Standard deviation of the Gaussian used to weight the gradient samples
 413        used to build a keypoint descriptor */
 414     const float DESC_SIGMA = 3.3f;
 415
 416
 417     /* X and Y gradient wavelet data */
 418     const int NX=2, NY=2;
 419     int dx_s[NX][5] = {{0, 0, 2, 4, -1}, {2, 0, 4, 4, 1}};
 420     int dy_s[NY][5] = {{0, 0, 4, 2, 1}, {0, 2, 4, 4, -1}};
 421
 422     CvSeq *keypoints, *descriptors = 0;
 423     CvMat imghdr, *img = cvGetMat(_img, &imghdr);
 424     CvMat maskhdr, *mask = _mask ? cvGetMat(_mask, &maskhdr) : 0;
 425
 426     const int max_ori_samples = (2*ORI_RADIUS+1)*(2*ORI_RADIUS+1);
 427     int descriptor_size = params.extended ? 128 : 64;
 428     const int descriptor_data_type = CV_32F;
 429     const int PATCH_SZ = 20;
 430     float DW[PATCH_SZ][PATCH_SZ];
 431     CvMat _DW = cvMat(PATCH_SZ, PATCH_SZ, CV_32F, DW);
 432     CvPoint apt[max_ori_samples];
 433     float apt_w[max_ori_samples];
 434     int i, j, k, nangle0 = 0, N;
 435     int nthreads = cvGetNumThreads();
 436
 437     CV_Assert(img != 0);
 438     CV_Assert(CV_MAT_TYPE(img->type) == CV_8UC1);
 439     CV_Assert(mask == 0 || (CV_ARE_SIZES_EQ(img,mask) && CV_MAT_TYPE(mask->type) == CV_8UC1));
 440     CV_Assert(storage != 0);
 441     CV_Assert(params.hessianThreshold >= 0);
 442     CV_Assert(params.nOctaves > 0);
 443     CV_Assert(params.nOctaveLayers > 0);
 444
 445     sum = cvCreateMat( img->height+1, img->width+1, CV_32SC1 );
 446     cvIntegral( img, sum );
 447     if( mask )
 448     {
 449         mask1 = cvCreateMat( img->height, img->width, CV_8UC1 );
 450         mask_sum = cvCreateMat( img->height+1, img->width+1, CV_32SC1 );
 451         cvMinS( mask, 1, mask1 );
 452         cvIntegral( mask1, mask_sum );
 453     }
 454     keypoints = icvFastHessianDetector( sum, mask_sum, storage, &params );
 455     N = keypoints->total;
 456     if( _descriptors )
 457     {
 458         descriptors = cvCreateSeq( 0, sizeof(CvSeq),
 459             descriptor_size*CV_ELEM_SIZE(descriptor_data_type), storage );
 460         cvSeqPushMulti( descriptors, 0, N );
 461     }
 462
 463     /* Coordinates and weights of samples used to calculate orientation */
 464     cv::Mat _G = cv::getGaussianKernel( 2*ORI_RADIUS+1, ORI_SIGMA, CV_32F );
 465     const float* G = (const float*)_G.data;
 466
 467     for( i = -ORI_RADIUS; i <= ORI_RADIUS; i++ )
 468     {
 469         for( j = -ORI_RADIUS; j <= ORI_RADIUS; j++ )
 470         {
 471             if( i*i + j*j <= ORI_RADIUS*ORI_RADIUS )
 472             {
 473                 apt[nangle0] = cvPoint(j,i);
 474                 apt_w[nangle0++] = G[i+ORI_RADIUS]*G[j+ORI_RADIUS];
 475             }
 476         }
 477     }
 478
 479     /* Gaussian used to weight descriptor samples */
 480     {
 481     double c2 = 1./(DESC_SIGMA*DESC_SIGMA*2);
 482     double gs = 0;
 483     for( i = 0; i < PATCH_SZ; i++ )
 484     {
 485         for( j = 0; j < PATCH_SZ; j++ )
 486         {
 487             double x = j - (float)(PATCH_SZ-1)/2, y = i - (float)(PATCH_SZ-1)/2;
 488             double val = exp(-(x*x+y*y)*c2);
 489             DW[i][j] = (float)val;
 490             gs += val;
 491         }
 492     }
 493     cvScale( &_DW, &_DW, 1./gs );
 494     }
 495
 496     win_bufs = (CvMat**)cvAlloc(nthreads*sizeof(win_bufs[0]));
 497     for( i = 0; i < nthreads; i++ )
 498         win_bufs[i] = 0;
 499
 500 #ifdef _OPENMP
 501 #pragma omp parallel for num_threads(nthreads) schedule(dynamic)
 502 #endif
 503     for( k = 0; k < N; k++ )
 504     {
 505         const int* sum_ptr = sum->data.i;
 506         int sum_cols = sum->cols;
 507         int i, j, kk, x, y, nangle;
 508         float X[max_ori_samples], Y[max_ori_samples], angle[max_ori_samples];
 509         uchar PATCH[PATCH_SZ+1][PATCH_SZ+1];
 510         float DX[PATCH_SZ][PATCH_SZ], DY[PATCH_SZ][PATCH_SZ];
 511         CvMat _X = cvMat(1, max_ori_samples, CV_32F, X);
 512         CvMat _Y = cvMat(1, max_ori_samples, CV_32F, Y);
 513         CvMat _angle = cvMat(1, max_ori_samples, CV_32F, angle);
 514         CvMat _patch = cvMat(PATCH_SZ+1, PATCH_SZ+1, CV_8U, PATCH);
 515         float* vec;
 516         CvSurfHF dx_t[NX], dy_t[NY];
 517         int thread_idx = cvGetThreadNum();
 518
 519         CvSURFPoint* kp = (CvSURFPoint*)cvGetSeqElem( keypoints, k );
 520         int size = kp->size;
 521         CvPoint2D32f center = kp->pt;
 522
 523         /* The sampling intervals and wavelet sized for selecting an orientation
 524            and building the keypoint descriptor are defined relative to 's' */
 525         float s = (float)size*1.2f/9.0f;
 526
 527         /* To find the dominant orientation, the gradients in x and y are
 528            sampled in a circle of radius 6s using wavelets of size 4s.
 529            We ensure the gradient wavelet size is even to ensure the
 530            wavelet pattern is balanced and symmetric around its center */
 531         int grad_wav_size = 2*cvRound( 2*s );
 532         if ( sum->rows < grad_wav_size || sum->cols < grad_wav_size )
 533         {
 534             /* when grad_wav_size is too big,
 535              * the sampling of gradient will be meaningless
 536              * mark keypoint for deletion. */
 537             kp->size = -1;
 538             continue;
 539         }
 540         icvResizeHaarPattern( dx_s, dx_t, NX, 4, grad_wav_size, sum->cols );
 541         icvResizeHaarPattern( dy_s, dy_t, NY, 4, grad_wav_size, sum->cols );
 542         for( kk = 0, nangle = 0; kk < nangle0; kk++ )
 543         {
 544             const int* ptr;
 545             float vx, vy;
 546             x = cvRound( center.x + apt[kk].x*s - (float)(grad_wav_size-1)/2 );
 547             y = cvRound( center.y + apt[kk].y*s - (float)(grad_wav_size-1)/2 );
 548             if( (unsigned)y >= (unsigned)(sum->rows - grad_wav_size) ||
 549                 (unsigned)x >= (unsigned)(sum->cols - grad_wav_size) )
 550                 continue;
 551             ptr = sum_ptr + x + y*sum_cols;
 552             vx = icvCalcHaarPattern( ptr, dx_t, 2 );
 553             vy = icvCalcHaarPattern( ptr, dy_t, 2 );
 554             X[nangle] = vx*apt_w[kk]; Y[nangle] = vy*apt_w[kk];
 555             nangle++;
 556         }
 557         if ( nangle == 0 )
 558         {
 559             /* No gradient could be sampled because the keypoint is too
 560              * near too one or more of the sides of the image. As we
 561              * therefore cannot find a dominant direction, we skip this
 562              * keypoint and mark it for later deletion from the sequence. */
 563             kp->size = -1;
 564             continue;
 565         }
 566         _X.cols = _Y.cols = _angle.cols = nangle;
 567         cvCartToPolar( &_X, &_Y, 0, &_angle, 1 );
 568
 569         float bestx = 0, besty = 0, descriptor_mod = 0;
 570         for( i = 0; i < 360; i += ORI_SEARCH_INC )
 571         {
 572             float sumx = 0, sumy = 0, temp_mod;
 573             for( j = 0; j < nangle; j++ )
 574             {
 575                 int d = abs(cvRound(angle[j]) - i);
 576                 if( d < ORI_WIN/2 || d > 360-ORI_WIN/2 )
 577                 {
 578                     sumx += X[j];
 579                     sumy += Y[j];
 580                 }
 581             }
 582             temp_mod = sumx*sumx + sumy*sumy;
 583             if( temp_mod > descriptor_mod )
 584             {
 585                 descriptor_mod = temp_mod;
 586                 bestx = sumx;
 587                 besty = sumy;
 588             }
 589         }
 590
 591         float descriptor_dir = cvFastArctan( besty, bestx );
 592         kp->dir = descriptor_dir;
 593
 594         if( !_descriptors )
 595             continue;
 596
 597         descriptor_dir *= (float)(CV_PI/180);
 598
 599         /* Extract a window of pixels around the keypoint of size 20s */
 600         int win_size = (int)((PATCH_SZ+1)*s);
 601         if( win_bufs[thread_idx] == 0 || win_bufs[thread_idx]->cols < win_size*win_size )
 602         {
 603             cvReleaseMat( &win_bufs[thread_idx] );
 604             win_bufs[thread_idx] = cvCreateMat( 1, win_size*win_size, CV_8U );
 605         }
 606
 607         CvMat win = cvMat(win_size, win_size, CV_8U, win_bufs[thread_idx]->data.ptr);
 608         float sin_dir = sin(descriptor_dir);
 609         float cos_dir = cos(descriptor_dir) ;
 610
 611         /* Subpixel interpolation version (slower). Subpixel not required since
 612            the pixels will all get averaged when we scale down to 20 pixels */
 613         /*
 614         float w[] = { cos_dir, sin_dir, center.x,
 615                       -sin_dir, cos_dir , center.y };
 616         CvMat W = cvMat(2, 3, CV_32F, w);
 617         cvGetQuadrangleSubPix( img, &win, &W );
 618         */
 619
 620         /* Nearest neighbour version (faster) */
 621         float win_offset = -(float)(win_size-1)/2;
 622         float start_x = center.x + win_offset*cos_dir + win_offset*sin_dir;
 623         float start_y = center.y - win_offset*sin_dir + win_offset*cos_dir;
 624         uchar* WIN = win.data.ptr;
 625         for( i=0; i<win_size; i++, start_x+=sin_dir, start_y+=cos_dir )
 626         {
 627             float pixel_x = start_x;
 628             float pixel_y = start_y;
 629             for( j=0; j<win_size; j++, pixel_x+=cos_dir, pixel_y-=sin_dir )
 630             {
 631                 int x = cvRound( pixel_x );
 632                 int y = cvRound( pixel_y );
 633                 x = MAX( x, 0 );
 634                 y = MAX( y, 0 );
 635                 x = MIN( x, img->cols-1 );
 636                 y = MIN( y, img->rows-1 );
 637                 WIN[i*win_size + j] = img->data.ptr[y*img->step+x];
 638              }
 639         }
 640
 641         /* Scale the window to size PATCH_SZ so each pixel's size is s. This
 642            makes calculating the gradients with wavelets of size 2s easy */
 643         cvResize( &win, &_patch, CV_INTER_AREA );
 644
 645         /* Calculate gradients in x and y with wavelets of size 2s */
 646         for( i = 0; i < PATCH_SZ; i++ )
 647             for( j = 0; j < PATCH_SZ; j++ )
 648             {
 649                 float dw = DW[i][j];
 650                 float vx = (PATCH[i][j+1] - PATCH[i][j] + PATCH[i+1][j+1] - PATCH[i+1][j])*dw;
 651                 float vy = (PATCH[i+1][j] - PATCH[i][j] + PATCH[i+1][j+1] - PATCH[i][j+1])*dw;
 652                 DX[i][j] = vx;
 653                 DY[i][j] = vy;
 654             }
 655
 656         /* Construct the descriptor */
 657         vec = (float*)cvGetSeqElem( descriptors, k );
 658         for( kk = 0; kk < (int)(descriptors->elem_size/sizeof(vec[0])); kk++ )
 659             vec[kk] = 0;
 660         double square_mag = 0;
 661         if( params.extended )
 662         {
 663             /* 128-bin descriptor */
 664             for( i = 0; i < 4; i++ )
 665                 for( j = 0; j < 4; j++ )
 666                 {
 667                     for( y = i*5; y < i*5+5; y++ )
 668                     {
 669                         for( x = j*5; x < j*5+5; x++ )
 670                         {
 671                             float tx = DX[y][x], ty = DY[y][x];
 672                             if( ty >= 0 )
 673                             {
 674                                 vec[0] += tx;
 675                                 vec[1] += (float)fabs(tx);
 676                             } else {
 677                                 vec[2] += tx;
 678                                 vec[3] += (float)fabs(tx);
 679                             }
 680                             if ( tx >= 0 )
 681                             {
 682                                 vec[4] += ty;
 683                                 vec[5] += (float)fabs(ty);
 684                             } else {
 685                                 vec[6] += ty;
 686                                 vec[7] += (float)fabs(ty);
 687                             }
 688                         }
 689                     }
 690                     for( kk = 0; kk < 8; kk++ )
 691                         square_mag += vec[kk]*vec[kk];
 692                     vec += 8;
 693                 }
 694         }
 695         else
 696         {
 697             /* 64-bin descriptor */
 698             for( i = 0; i < 4; i++ )
 699                 for( j = 0; j < 4; j++ )
 700                 {
 701                     for( y = i*5; y < i*5+5; y++ )
 702                     {
 703                         for( x = j*5; x < j*5+5; x++ )
 704                         {
 705                             float tx = DX[y][x], ty = DY[y][x];
 706                             vec[0] += tx; vec[1] += ty;
 707                             vec[2] += (float)fabs(tx); vec[3] += (float)fabs(ty);
 708                         }
 709                     }
 710                     for( kk = 0; kk < 4; kk++ )
 711                         square_mag += vec[kk]*vec[kk];
 712                     vec+=4;
 713                 }
 714         }
 715
 716         /* unit vector is essential for contrast invariance */
 717         vec = (float*)cvGetSeqElem( descriptors, k );
 718         double scale = 1./(sqrt(square_mag) + DBL_EPSILON);
 719         for( kk = 0; kk < descriptor_size; kk++ )
 720             vec[kk] = (float)(vec[kk]*scale);
 721     }
 722
 723     /* remove keypoints that were marked for deletion */
 724     for ( i = 0; i < N; i++ )
 725     {
 726         CvSURFPoint* kp = (CvSURFPoint*)cvGetSeqElem( keypoints, i );
 727         if ( kp->size == -1 )
 728         {
 729             cvSeqRemove( keypoints, i );
 730             if ( _descriptors )
 731                 cvSeqRemove( descriptors, i );
 732             k--;
 733             N--;
 734         }
 735     }
 736
 737     for( i = 0; i < nthreads; i++ )
 738         cvReleaseMat( &win_bufs[i] );
 739
 740     if( _keypoints )
 741         *_keypoints = keypoints;
 742     if( _descriptors )
 743         *_descriptors = descriptors;
 744
 745     cvReleaseMat( &sum );
 746     cvReleaseMat( &mask1 );
 747     cvReleaseMat( &mask_sum );
 748     cvFree( &win_bufs );
 749 }
 750