cmake/FindCUDA.cmake

   1 #.rst:
   2 # FindCUDA
   3 # --------
   4 #
   5 # Tools for building CUDA C files: libraries and build dependencies.
   6 #
   7 # This script locates the NVIDIA CUDA C tools.  It should work on linux,
   8 # windows, and mac and should be reasonably up to date with CUDA C
   9 # releases.
  10 #
  11 # This script makes use of the standard find_package arguments of
  12 # <VERSION>, REQUIRED and QUIET.  CUDA_FOUND will report if an
  13 # acceptable version of CUDA was found.
  14 #
  15 # The script will prompt the user to specify CUDA_TOOLKIT_ROOT_DIR if
  16 # the prefix cannot be determined by the location of nvcc in the system
  17 # path and REQUIRED is specified to find_package().  To use a different
  18 # installed version of the toolkit set the environment variable
  19 # CUDA_BIN_PATH before running cmake (e.g.
  20 # CUDA_BIN_PATH=/usr/local/cuda1.0 instead of the default
  21 # /usr/local/cuda) or set CUDA_TOOLKIT_ROOT_DIR after configuring.  If
  22 # you change the value of CUDA_TOOLKIT_ROOT_DIR, various components that
  23 # depend on the path will be relocated.
  24 #
  25 # It might be necessary to set CUDA_TOOLKIT_ROOT_DIR manually on certain
  26 # platforms, or to use a cuda runtime not installed in the default
  27 # location.  In newer versions of the toolkit the cuda library is
  28 # included with the graphics driver- be sure that the driver version
  29 # matches what is needed by the cuda runtime version.
  30 #
  31 # The following variables affect the behavior of the macros in the
  32 # script (in alphebetical order).  Note that any of these flags can be
  33 # changed multiple times in the same directory before calling
  34 # CUDA_ADD_EXECUTABLE, CUDA_ADD_LIBRARY, CUDA_COMPILE, CUDA_COMPILE_PTX,
  35 # CUDA_COMPILE_FATBIN, CUDA_COMPILE_CUBIN or CUDA_WRAP_SRCS::
  36 #
  37 #   CUDA_64_BIT_DEVICE_CODE (Default matches host bit size)
  38 #   -- Set to ON to compile for 64 bit device code, OFF for 32 bit device code.
  39 #      Note that making this different from the host code when generating object
  40 #      or C files from CUDA code just won't work, because size_t gets defined by
  41 #      nvcc in the generated source.  If you compile to PTX and then load the
  42 #      file yourself, you can mix bit sizes between device and host.
  43 #
  44 #   CUDA_ATTACH_VS_BUILD_RULE_TO_CUDA_FILE (Default ON)
  45 #   -- Set to ON if you want the custom build rule to be attached to the source
  46 #      file in Visual Studio.  Turn OFF if you add the same cuda file to multiple
  47 #      targets.
  48 #
  49 #      This allows the user to build the target from the CUDA file; however, bad
  50 #      things can happen if the CUDA source file is added to multiple targets.
  51 #      When performing parallel builds it is possible for the custom build
  52 #      command to be run more than once and in parallel causing cryptic build
  53 #      errors.  VS runs the rules for every source file in the target, and a
  54 #      source can have only one rule no matter how many projects it is added to.
  55 #      When the rule is run from multiple targets race conditions can occur on
  56 #      the generated file.  Eventually everything will get built, but if the user
  57 #      is unaware of this behavior, there may be confusion.  It would be nice if
  58 #      this script could detect the reuse of source files across multiple targets
  59 #      and turn the option off for the user, but no good solution could be found.
  60 #
  61 #   CUDA_BUILD_CUBIN (Default OFF)
  62 #   -- Set to ON to enable and extra compilation pass with the -cubin option in
  63 #      Device mode. The output is parsed and register, shared memory usage is
  64 #      printed during build.
  65 #
  66 #   CUDA_BUILD_EMULATION (Default OFF for device mode)
  67 #   -- Set to ON for Emulation mode. -D_DEVICEEMU is defined for CUDA C files
  68 #      when CUDA_BUILD_EMULATION is TRUE.
  69 #
  70 #   CUDA_GENERATED_OUTPUT_DIR (Default CMAKE_CURRENT_BINARY_DIR)
  71 #   -- Set to the path you wish to have the generated files placed.  If it is
  72 #      blank output files will be placed in CMAKE_CURRENT_BINARY_DIR.
  73 #      Intermediate files will always be placed in
  74 #      CMAKE_CURRENT_BINARY_DIR/CMakeFiles.
  75 #
  76 #   CUDA_HOST_COMPILATION_CPP (Default ON)
  77 #   -- Set to OFF for C compilation of host code.
  78 #
  79 #   CUDA_HOST_COMPILER (Default CMAKE_C_COMPILER, $(VCInstallDir)/bin for VS)
  80 #   -- Set the host compiler to be used by nvcc.  Ignored if -ccbin or
  81 #      --compiler-bindir is already present in the CUDA_NVCC_FLAGS or
  82 #      CUDA_NVCC_FLAGS_<CONFIG> variables.  For Visual Studio targets
  83 #      $(VCInstallDir)/bin is a special value that expands out to the path when
  84 #      the command is run from within VS.
  85 #
  86 #   CUDA_NVCC_FLAGS
  87 #   CUDA_NVCC_FLAGS_<CONFIG>
  88 #   -- Additional NVCC command line arguments.  NOTE: multiple arguments must be
  89 #      semi-colon delimited (e.g. --compiler-options;-Wall)
  90 #
  91 #   CUDA_PROPAGATE_HOST_FLAGS (Default ON)
  92 #   -- Set to ON to propagate CMAKE_{C,CXX}_FLAGS and their configuration
  93 #      dependent counterparts (e.g. CMAKE_C_FLAGS_DEBUG) automatically to the
  94 #      host compiler through nvcc's -Xcompiler flag.  This helps make the
  95 #      generated host code match the rest of the system better.  Sometimes
  96 #      certain flags give nvcc problems, and this will help you turn the flag
  97 #      propagation off.  This does not affect the flags supplied directly to nvcc
  98 #      via CUDA_NVCC_FLAGS or through the OPTION flags specified through
  99 #      CUDA_ADD_LIBRARY, CUDA_ADD_EXECUTABLE, or CUDA_WRAP_SRCS.  Flags used for
 100 #      shared library compilation are not affected by this flag.
 101 #
 102 #   CUDA_SEPARABLE_COMPILATION (Default OFF)
 103 #   -- If set this will enable separable compilation for all CUDA runtime object
 104 #      files.  If used outside of CUDA_ADD_EXECUTABLE and CUDA_ADD_LIBRARY
 105 #      (e.g. calling CUDA_WRAP_SRCS directly),
 106 #      CUDA_COMPUTE_SEPARABLE_COMPILATION_OBJECT_FILE_NAME and
 107 #      CUDA_LINK_SEPARABLE_COMPILATION_OBJECTS should be called.
 108 #
 109 #   CUDA_SOURCE_PROPERTY_FORMAT
 110 #   -- If this source file property is set, it can override the format specified
 111 #      to CUDA_WRAP_SRCS (OBJ, PTX, CUBIN, or FATBIN).  If an input source file
 112 #      is not a .cu file, setting this file will cause it to be treated as a .cu
 113 #      file. See documentation for set_source_files_properties on how to set
 114 #      this property.
 115 #
 116 #   CUDA_USE_STATIC_CUDA_RUNTIME (Default ON)
 117 #   -- When enabled the static version of the CUDA runtime library will be used
 118 #      in CUDA_LIBRARIES.  If the version of CUDA configured doesn't support
 119 #      this option, then it will be silently disabled.
 120 #
 121 #   CUDA_VERBOSE_BUILD (Default OFF)
 122 #   -- Set to ON to see all the commands used when building the CUDA file.  When
 123 #      using a Makefile generator the value defaults to VERBOSE (run make
 124 #      VERBOSE=1 to see output), although setting CUDA_VERBOSE_BUILD to ON will
 125 #      always print the output.
 126 #
 127 # The script creates the following macros (in alphebetical order)::
 128 #
 129 #   CUDA_ADD_CUFFT_TO_TARGET( cuda_target )
 130 #   -- Adds the cufft library to the target (can be any target).  Handles whether
 131 #      you are in emulation mode or not.
 132 #
 133 #   CUDA_ADD_CUBLAS_TO_TARGET( cuda_target )
 134 #   -- Adds the cublas library to the target (can be any target).  Handles
 135 #      whether you are in emulation mode or not.
 136 #
 137 #   CUDA_ADD_EXECUTABLE( cuda_target file0 file1 ...
 138 #                        [WIN32] [MACOSX_BUNDLE] [EXCLUDE_FROM_ALL] [OPTIONS ...] )
 139 #   -- Creates an executable "cuda_target" which is made up of the files
 140 #      specified.  All of the non CUDA C files are compiled using the standard
 141 #      build rules specified by CMAKE and the cuda files are compiled to object
 142 #      files using nvcc and the host compiler.  In addition CUDA_INCLUDE_DIRS is
 143 #      added automatically to include_directories().  Some standard CMake target
 144 #      calls can be used on the target after calling this macro
 145 #      (e.g. set_target_properties and target_link_libraries), but setting
 146 #      properties that adjust compilation flags will not affect code compiled by
 147 #      nvcc.  Such flags should be modified before calling CUDA_ADD_EXECUTABLE,
 148 #      CUDA_ADD_LIBRARY or CUDA_WRAP_SRCS.
 149 #
 150 #   CUDA_ADD_LIBRARY( cuda_target file0 file1 ...
 151 #                     [STATIC | SHARED | MODULE] [EXCLUDE_FROM_ALL] [OPTIONS ...] )
 152 #   -- Same as CUDA_ADD_EXECUTABLE except that a library is created.
 153 #
 154 #   CUDA_BUILD_CLEAN_TARGET()
 155 #   -- Creates a convience target that deletes all the dependency files
 156 #      generated.  You should make clean after running this target to ensure the
 157 #      dependency files get regenerated.
 158 #
 159 #   CUDA_COMPILE( generated_files file0 file1 ... [STATIC | SHARED | MODULE]
 160 #                 [OPTIONS ...] )
 161 #   -- Returns a list of generated files from the input source files to be used
 162 #      with ADD_LIBRARY or ADD_EXECUTABLE.
 163 #
 164 #   CUDA_COMPILE_PTX( generated_files file0 file1 ... [OPTIONS ...] )
 165 #   -- Returns a list of PTX files generated from the input source files.
 166 #
 167 #   CUDA_COMPILE_FATBIN( generated_files file0 file1 ... [OPTIONS ...] )
 168 #   -- Returns a list of FATBIN files generated from the input source files.
 169 #
 170 #   CUDA_COMPILE_CUBIN( generated_files file0 file1 ... [OPTIONS ...] )
 171 #   -- Returns a list of CUBIN files generated from the input source files.
 172 #
 173 #   CUDA_COMPUTE_SEPARABLE_COMPILATION_OBJECT_FILE_NAME( output_file_var
 174 #                                                        cuda_target
 175 #                                                        object_files )
 176 #   -- Compute the name of the intermediate link file used for separable
 177 #      compilation.  This file name is typically passed into
 178 #      CUDA_LINK_SEPARABLE_COMPILATION_OBJECTS.  output_file_var is produced
 179 #      based on cuda_target the list of objects files that need separable
 180 #      compilation as specified by object_files.  If the object_files list is
 181 #      empty, then output_file_var will be empty.  This function is called
 182 #      automatically for CUDA_ADD_LIBRARY and CUDA_ADD_EXECUTABLE.  Note that
 183 #      this is a function and not a macro.
 184 #
 185 #   CUDA_INCLUDE_DIRECTORIES( path0 path1 ... )
 186 #   -- Sets the directories that should be passed to nvcc
 187 #      (e.g. nvcc -Ipath0 -Ipath1 ... ). These paths usually contain other .cu
 188 #      files.
 189 #
 190 #
 191 #   CUDA_LINK_SEPARABLE_COMPILATION_OBJECTS( output_file_var cuda_target
 192 #                                            nvcc_flags object_files)
 193 #   -- Generates the link object required by separable compilation from the given
 194 #      object files.  This is called automatically for CUDA_ADD_EXECUTABLE and
 195 #      CUDA_ADD_LIBRARY, but can be called manually when using CUDA_WRAP_SRCS
 196 #      directly.  When called from CUDA_ADD_LIBRARY or CUDA_ADD_EXECUTABLE the
 197 #      nvcc_flags passed in are the same as the flags passed in via the OPTIONS
 198 #      argument.  The only nvcc flag added automatically is the bitness flag as
 199 #      specified by CUDA_64_BIT_DEVICE_CODE.  Note that this is a function
 200 #      instead of a macro.
 201 #
 202 #   CUDA_SELECT_NVCC_ARCH_FLAGS(out_variable [target_CUDA_architectures])
 203 #   -- Selects GPU arch flags for nvcc based on target_CUDA_architectures
 204 #      target_CUDA_architectures : Auto | Common | All | LIST(ARCH_AND_PTX ...)
 205 #       - "Auto" detects local machine GPU compute arch at runtime.
 206 #       - "Common" and "All" cover common and entire subsets of architectures
 207 #      ARCH_AND_PTX : NAME | NUM.NUM | NUM.NUM(NUM.NUM) | NUM.NUM+PTX
 208 #      NAME: Fermi Kepler Maxwell Kepler+Tegra Kepler+Tesla Maxwell+Tegra Pascal
 209 #      NUM: Any number. Only those pairs are currently accepted by NVCC though:
 210 #            2.0 2.1 3.0 3.2 3.5 3.7 5.0 5.2 5.3 6.0 6.2
 211 #      Returns LIST of flags to be added to CUDA_NVCC_FLAGS in ${out_variable}
 212 #      Additionally, sets ${out_variable}_readable to the resulting numeric list
 213 #      Example:
 214 #       CUDA_SELECT_NVCC_ARCH_FLAGS(ARCH_FLAGS 3.0 3.5+PTX 5.2(5.0) Maxwell)
 215 #        LIST(APPEND CUDA_NVCC_FLAGS ${ARCH_FLAGS})
 216 #
 217 #      More info on CUDA architectures: https://en.wikipedia.org/wiki/CUDA
 218 #      Note that this is a function instead of a macro.
 219 #
 220 #   CUDA_WRAP_SRCS ( cuda_target format generated_files file0 file1 ...
 221 #                    [STATIC | SHARED | MODULE] [OPTIONS ...] )
 222 #   -- This is where all the magic happens.  CUDA_ADD_EXECUTABLE,
 223 #      CUDA_ADD_LIBRARY, CUDA_COMPILE, and CUDA_COMPILE_PTX all call this
 224 #      function under the hood.
 225 #
 226 #      Given the list of files (file0 file1 ... fileN) this macro generates
 227 #      custom commands that generate either PTX or linkable objects (use "PTX" or
 228 #      "OBJ" for the format argument to switch).  Files that don't end with .cu
 229 #      or have the HEADER_FILE_ONLY property are ignored.
 230 #
 231 #      The arguments passed in after OPTIONS are extra command line options to
 232 #      give to nvcc.  You can also specify per configuration options by
 233 #      specifying the name of the configuration followed by the options.  General
 234 #      options must precede configuration specific options.  Not all
 235 #      configurations need to be specified, only the ones provided will be used.
 236 #
 237 #         OPTIONS -DFLAG=2 "-DFLAG_OTHER=space in flag"
 238 #         DEBUG -g
 239 #         RELEASE --use_fast_math
 240 #         RELWITHDEBINFO --use_fast_math;-g
 241 #         MINSIZEREL --use_fast_math
 242 #
 243 #      For certain configurations (namely VS generating object files with
 244 #      CUDA_ATTACH_VS_BUILD_RULE_TO_CUDA_FILE set to ON), no generated file will
 245 #      be produced for the given cuda file.  This is because when you add the
 246 #      cuda file to Visual Studio it knows that this file produces an object file
 247 #      and will link in the resulting object file automatically.
 248 #
 249 #      This script will also generate a separate cmake script that is used at
 250 #      build time to invoke nvcc.  This is for several reasons.
 251 #
 252 #        1. nvcc can return negative numbers as return values which confuses
 253 #        Visual Studio into thinking that the command succeeded.  The script now
 254 #        checks the error codes and produces errors when there was a problem.
 255 #
 256 #        2. nvcc has been known to not delete incomplete results when it
 257 #        encounters problems.  This confuses build systems into thinking the
 258 #        target was generated when in fact an unusable file exists.  The script
 259 #        now deletes the output files if there was an error.
 260 #
 261 #        3. By putting all the options that affect the build into a file and then
 262 #        make the build rule dependent on the file, the output files will be
 263 #        regenerated when the options change.
 264 #
 265 #      This script also looks at optional arguments STATIC, SHARED, or MODULE to
 266 #      determine when to target the object compilation for a shared library.
 267 #      BUILD_SHARED_LIBS is ignored in CUDA_WRAP_SRCS, but it is respected in
 268 #      CUDA_ADD_LIBRARY.  On some systems special flags are added for building
 269 #      objects intended for shared libraries.  A preprocessor macro,
 270 #      <target_name>_EXPORTS is defined when a shared library compilation is
 271 #      detected.
 272 #
 273 #      Flags passed into add_definitions with -D or /D are passed along to nvcc.
 274 #
 275 #
 276 #
 277 # The script defines the following variables::
 278 #
 279 #   CUDA_VERSION_MAJOR    -- The major version of cuda as reported by nvcc.
 280 #   CUDA_VERSION_MINOR    -- The minor version.
 281 #   CUDA_VERSION
 282 #   CUDA_VERSION_STRING   -- CUDA_VERSION_MAJOR.CUDA_VERSION_MINOR
 283 #   CUDA_HAS_FP16         -- Whether a short float (float16,fp16) is supported.
 284 #
 285 #   CUDA_TOOLKIT_ROOT_DIR -- Path to the CUDA Toolkit (defined if not set).
 286 #   CUDA_SDK_ROOT_DIR     -- Path to the CUDA SDK.  Use this to find files in the
 287 #                            SDK.  This script will not directly support finding
 288 #                            specific libraries or headers, as that isn't
 289 #                            supported by NVIDIA.  If you want to change
 290 #                            libraries when the path changes see the
 291 #                            FindCUDA.cmake script for an example of how to clear
 292 #                            these variables.  There are also examples of how to
 293 #                            use the CUDA_SDK_ROOT_DIR to locate headers or
 294 #                            libraries, if you so choose (at your own risk).
 295 #   CUDA_INCLUDE_DIRS     -- Include directory for cuda headers.  Added automatically
 296 #                            for CUDA_ADD_EXECUTABLE and CUDA_ADD_LIBRARY.
 297 #   CUDA_LIBRARIES        -- Cuda RT library.
 298 #   CUDA_CUFFT_LIBRARIES  -- Device or emulation library for the Cuda FFT
 299 #                            implementation (alternative to:
 300 #                            CUDA_ADD_CUFFT_TO_TARGET macro)
 301 #   CUDA_CUBLAS_LIBRARIES -- Device or emulation library for the Cuda BLAS
 302 #                            implementation (alternative to:
 303 #                            CUDA_ADD_CUBLAS_TO_TARGET macro).
 304 #   CUDA_cudart_static_LIBRARY -- Statically linkable cuda runtime library.
 305 #                                 Only available for CUDA version 5.5+
 306 #   CUDA_cudadevrt_LIBRARY -- Device runtime library.
 307 #                             Required for separable compilation.
 308 #   CUDA_cupti_LIBRARY    -- CUDA Profiling Tools Interface library.
 309 #                            Only available for CUDA version 4.0+.
 310 #   CUDA_curand_LIBRARY   -- CUDA Random Number Generation library.
 311 #                            Only available for CUDA version 3.2+.
 312 #   CUDA_cusolver_LIBRARY -- CUDA Direct Solver library.
 313 #                            Only available for CUDA version 7.0+.
 314 #   CUDA_cusparse_LIBRARY -- CUDA Sparse Matrix library.
 315 #                            Only available for CUDA version 3.2+.
 316 #   CUDA_npp_LIBRARY      -- NVIDIA Performance Primitives lib.
 317 #                            Only available for CUDA version 4.0+.
 318 #   CUDA_nppc_LIBRARY     -- NVIDIA Performance Primitives lib (core).
 319 #                            Only available for CUDA version 5.5+.
 320 #   CUDA_nppi_LIBRARY     -- NVIDIA Performance Primitives lib (image processing).
 321 #                            Only available for CUDA version 5.5+.
 322 #   CUDA_npps_LIBRARY     -- NVIDIA Performance Primitives lib (signal processing).
 323 #                            Only available for CUDA version 5.5+.
 324 #   CUDA_nvcuvenc_LIBRARY -- CUDA Video Encoder library.
 325 #                            Only available for CUDA version 3.2+.
 326 #                            Windows only.
 327 #   CUDA_nvcuvid_LIBRARY  -- CUDA Video Decoder library.
 328 #                            Only available for CUDA version 3.2+.
 329 #                            Windows only.
 330 #
 331
 332 #   James Bigler, NVIDIA Corp (nvidia.com - jbigler)
 333 #   Abe Stephens, SCI Institute -- http://www.sci.utah.edu/~abe/FindCuda.html
 334 #
 335 #   Copyright (c) 2008 - 2009 NVIDIA Corporation.  All rights reserved.
 336 #
 337 #   Copyright (c) 2007-2009
 338 #   Scientific Computing and Imaging Institute, University of Utah
 339 #
 340 #   This code is licensed under the MIT License.  See the FindCUDA.cmake script
 341 #   for the text of the license.
 342
 343 # The MIT License
 344 #
 345 # License for the specific language governing rights and limitations under
 346 # Permission is hereby granted, free of charge, to any person obtaining a
 347 # copy of this software and associated documentation files (the "Software"),
 348 # to deal in the Software without restriction, including without limitation
 349 # the rights to use, copy, modify, merge, publish, distribute, sublicense,
 350 # and/or sell copies of the Software, and to permit persons to whom the
 351 # Software is furnished to do so, subject to the following conditions:
 352 #
 353 # The above copyright notice and this permission notice shall be included
 354 # in all copies or substantial portions of the Software.
 355 #
 356 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
 357 # OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 358 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 359 # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 360 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 361 # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 362 # DEALINGS IN THE SOFTWARE.
 363 #
 364 ###############################################################################
 365
 366 # FindCUDA.cmake
 367
 368 # This macro helps us find the location of helper files we will need the full path to
 369 macro(CUDA_FIND_HELPER_FILE _name _extension)
 370   set(_full_name "${_name}.${_extension}")
 371   # CMAKE_CURRENT_LIST_FILE contains the full path to the file currently being
 372   # processed.  Using this variable, we can pull out the current path, and
 373   # provide a way to get access to the other files we need local to here.
 374   get_filename_component(CMAKE_CURRENT_LIST_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH)
 375   set(CUDA_${_name} "${CMAKE_CURRENT_LIST_DIR}/FindCUDA/${_full_name}")
 376   if(NOT EXISTS "${CUDA_${_name}}")
 377     set(error_message "${_full_name} not found in ${CMAKE_CURRENT_LIST_DIR}/FindCUDA")
 378     if(CUDA_FIND_REQUIRED)
 379       message(FATAL_ERROR "${error_message}")
 380     else()
 381       if(NOT CUDA_FIND_QUIETLY)
 382         message(STATUS "${error_message}")
 383       endif()
 384     endif()
 385   endif()
 386   # Set this variable as internal, so the user isn't bugged with it.
 387   set(CUDA_${_name} ${CUDA_${_name}} CACHE INTERNAL "Location of ${_full_name}" FORCE)
 388 endmacro()
 389
 390 #####################################################################
 391 ## CUDA_INCLUDE_NVCC_DEPENDENCIES
 392 ##
 393
 394 # So we want to try and include the dependency file if it exists.  If
 395 # it doesn't exist then we need to create an empty one, so we can
 396 # include it.
 397
 398 # If it does exist, then we need to check to see if all the files it
 399 # depends on exist.  If they don't then we should clear the dependency
 400 # file and regenerate it later.  This covers the case where a header
 401 # file has disappeared or moved.
 402
 403 macro(CUDA_INCLUDE_NVCC_DEPENDENCIES dependency_file)
 404   set(CUDA_NVCC_DEPEND)
 405   set(CUDA_NVCC_DEPEND_REGENERATE FALSE)
 406
 407
 408   # Include the dependency file.  Create it first if it doesn't exist .  The
 409   # INCLUDE puts a dependency that will force CMake to rerun and bring in the
 410   # new info when it changes.  DO NOT REMOVE THIS (as I did and spent a few
 411   # hours figuring out why it didn't work.
 412   if(NOT EXISTS ${dependency_file})
 413     file(WRITE ${dependency_file} "#FindCUDA.cmake generated file.  Do not edit.\n")
 414   endif()
 415   # Always include this file to force CMake to run again next
 416   # invocation and rebuild the dependencies.
 417   #message("including dependency_file = ${dependency_file}")
 418   include(${dependency_file})
 419
 420   # Now we need to verify the existence of all the included files
 421   # here.  If they aren't there we need to just blank this variable and
 422   # make the file regenerate again.
 423 #   if(DEFINED CUDA_NVCC_DEPEND)
 424 #     message("CUDA_NVCC_DEPEND set")
 425 #   else()
 426 #     message("CUDA_NVCC_DEPEND NOT set")
 427 #   endif()
 428   if(CUDA_NVCC_DEPEND)
 429     #message("CUDA_NVCC_DEPEND found")
 430     foreach(f ${CUDA_NVCC_DEPEND})
 431       # message("searching for ${f}")
 432       if(NOT EXISTS ${f})
 433         #message("file ${f} not found")
 434         set(CUDA_NVCC_DEPEND_REGENERATE TRUE)
 435       endif()
 436     endforeach()
 437   else()
 438     #message("CUDA_NVCC_DEPEND false")
 439     # No dependencies, so regenerate the file.
 440     set(CUDA_NVCC_DEPEND_REGENERATE TRUE)
 441   endif()
 442
 443   #message("CUDA_NVCC_DEPEND_REGENERATE = ${CUDA_NVCC_DEPEND_REGENERATE}")
 444   # No incoming dependencies, so we need to generate them.  Make the
 445   # output depend on the dependency file itself, which should cause the
 446   # rule to re-run.
 447   if(CUDA_NVCC_DEPEND_REGENERATE)
 448     set(CUDA_NVCC_DEPEND ${dependency_file})
 449     #message("Generating an empty dependency_file: ${dependency_file}")
 450     file(WRITE ${dependency_file} "#FindCUDA.cmake generated file.  Do not edit.\n")
 451   endif()
 452
 453 endmacro()
 454
 455 ###############################################################################
 456 ###############################################################################
 457 # Setup variables' defaults
 458 ###############################################################################
 459 ###############################################################################
 460
 461 # Allow the user to specify if the device code is supposed to be 32 or 64 bit.
 462 if(CMAKE_SIZEOF_VOID_P EQUAL 8)
 463   set(CUDA_64_BIT_DEVICE_CODE_DEFAULT ON)
 464 else()
 465   set(CUDA_64_BIT_DEVICE_CODE_DEFAULT OFF)
 466 endif()
 467 option(CUDA_64_BIT_DEVICE_CODE "Compile device code in 64 bit mode" ${CUDA_64_BIT_DEVICE_CODE_DEFAULT})
 468
 469 # Attach the build rule to the source file in VS.  This option
 470 option(CUDA_ATTACH_VS_BUILD_RULE_TO_CUDA_FILE "Attach the build rule to the CUDA source file.  Enable only when the CUDA source file is added to at most one target." ON)
 471
 472 # Prints out extra information about the cuda file during compilation
 473 option(CUDA_BUILD_CUBIN "Generate and parse .cubin files in Device mode." OFF)
 474
 475 # Set whether we are using emulation or device mode.
 476 option(CUDA_BUILD_EMULATION "Build in Emulation mode" OFF)
 477
 478 # Where to put the generated output.
 479 set(CUDA_GENERATED_OUTPUT_DIR "" CACHE PATH "Directory to put all the output files.  If blank it will default to the CMAKE_CURRENT_BINARY_DIR")
 480
 481 # Parse HOST_COMPILATION mode.
 482 option(CUDA_HOST_COMPILATION_CPP "Generated file extension" ON)
 483
 484 # Extra user settable flags
 485 set(CUDA_NVCC_FLAGS "" CACHE STRING "Semi-colon delimit multiple arguments.")
 486
 487 if(CMAKE_GENERATOR MATCHES "Visual Studio")
 488   set(CUDA_HOST_COMPILER "$(VCInstallDir)bin" CACHE FILEPATH "Host side compiler used by NVCC")
 489 else()
 490   if(APPLE
 491       AND "${CMAKE_C_COMPILER_ID}" MATCHES "Clang"
 492       AND "${CMAKE_C_COMPILER}" MATCHES "/cc$")
 493     # Using cc which is symlink to clang may let NVCC think it is GCC and issue
 494     # unhandled -dumpspecs option to clang. Also in case neither
 495     # CMAKE_C_COMPILER is defined (project does not use C language) nor
 496     # CUDA_HOST_COMPILER is specified manually we should skip -ccbin and let
 497     # nvcc use its own default C compiler.
 498     # Only care about this on APPLE with clang to avoid
 499     # following symlinks to things like ccache
 500     if(DEFINED CMAKE_C_COMPILER AND NOT DEFINED CUDA_HOST_COMPILER)
 501       get_filename_component(c_compiler_realpath "${CMAKE_C_COMPILER}" REALPATH)
 502       # if the real path does not end up being clang then
 503       # go back to using CMAKE_C_COMPILER
 504       if(NOT "${c_compiler_realpath}" MATCHES "/clang$")
 505         set(c_compiler_realpath "${CMAKE_C_COMPILER}")
 506       endif()
 507     else()
 508       set(c_compiler_realpath "")
 509     endif()
 510     set(CUDA_HOST_COMPILER "${c_compiler_realpath}" CACHE FILEPATH "Host side compiler used by NVCC")
 511   else()
 512     set(CUDA_HOST_COMPILER "${CMAKE_C_COMPILER}"
 513       CACHE FILEPATH "Host side compiler used by NVCC")
 514   endif()
 515 endif()
 516
 517 # Propagate the host flags to the host compiler via -Xcompiler
 518 option(CUDA_PROPAGATE_HOST_FLAGS "Propage C/CXX_FLAGS and friends to the host compiler via -Xcompile" ON)
 519
 520 # Enable CUDA_SEPARABLE_COMPILATION
 521 option(CUDA_SEPARABLE_COMPILATION "Compile CUDA objects with separable compilation enabled.  Requires CUDA 5.0+" OFF)
 522
 523 # Specifies whether the commands used when compiling the .cu file will be printed out.
 524 option(CUDA_VERBOSE_BUILD "Print out the commands run while compiling the CUDA source file.  With the Makefile generator this defaults to VERBOSE variable specified on the command line, but can be forced on with this option." OFF)
 525
 526 mark_as_advanced(
 527   CUDA_64_BIT_DEVICE_CODE
 528   CUDA_ATTACH_VS_BUILD_RULE_TO_CUDA_FILE
 529   CUDA_GENERATED_OUTPUT_DIR
 530   CUDA_HOST_COMPILATION_CPP
 531   CUDA_NVCC_FLAGS
 532   CUDA_PROPAGATE_HOST_FLAGS
 533   CUDA_BUILD_CUBIN
 534   CUDA_BUILD_EMULATION
 535   CUDA_VERBOSE_BUILD
 536   CUDA_SEPARABLE_COMPILATION
 537   )
 538
 539 # Makefile and similar generators don't define CMAKE_CONFIGURATION_TYPES, so we
 540 # need to add another entry for the CMAKE_BUILD_TYPE.  We also need to add the
 541 # standerd set of 4 build types (Debug, MinSizeRel, Release, and RelWithDebInfo)
 542 # for completeness.  We need run this loop in order to accomodate the addition
 543 # of extra configuration types.  Duplicate entries will be removed by
 544 # REMOVE_DUPLICATES.
 545 set(CUDA_configuration_types ${CMAKE_CONFIGURATION_TYPES} ${CMAKE_BUILD_TYPE} Debug MinSizeRel Release RelWithDebInfo)
 546 list(REMOVE_DUPLICATES CUDA_configuration_types)
 547 foreach(config ${CUDA_configuration_types})
 548     string(TOUPPER ${config} config_upper)
 549     set(CUDA_NVCC_FLAGS_${config_upper} "" CACHE STRING "Semi-colon delimit multiple arguments.")
 550     mark_as_advanced(CUDA_NVCC_FLAGS_${config_upper})
 551 endforeach()
 552
 553 ###############################################################################
 554 ###############################################################################
 555 # Locate CUDA, Set Build Type, etc.
 556 ###############################################################################
 557 ###############################################################################
 558
 559 macro(cuda_unset_include_and_libraries)
 560   unset(CUDA_TOOLKIT_INCLUDE CACHE)
 561   unset(CUDA_CUDART_LIBRARY CACHE)
 562   unset(CUDA_CUDA_LIBRARY CACHE)
 563   # Make sure you run this before you unset CUDA_VERSION.
 564   if(CUDA_VERSION VERSION_EQUAL "3.0")
 565     # This only existed in the 3.0 version of the CUDA toolkit
 566     unset(CUDA_CUDARTEMU_LIBRARY CACHE)
 567   endif()
 568   unset(CUDA_cudart_static_LIBRARY CACHE)
 569   unset(CUDA_cudadevrt_LIBRARY CACHE)
 570   unset(CUDA_cublas_LIBRARY CACHE)
 571   unset(CUDA_cublas_device_LIBRARY CACHE)
 572   unset(CUDA_cublasemu_LIBRARY CACHE)
 573   unset(CUDA_cufft_LIBRARY CACHE)
 574   unset(CUDA_cufftemu_LIBRARY CACHE)
 575   unset(CUDA_cupti_LIBRARY CACHE)
 576   unset(CUDA_curand_LIBRARY CACHE)
 577   unset(CUDA_cusolver_LIBRARY CACHE)
 578   unset(CUDA_cusparse_LIBRARY CACHE)
 579   unset(CUDA_npp_LIBRARY CACHE)
 580   unset(CUDA_nppc_LIBRARY CACHE)
 581   unset(CUDA_nppi_LIBRARY CACHE)
 582   unset(CUDA_npps_LIBRARY CACHE)
 583   unset(CUDA_nvcuvenc_LIBRARY CACHE)
 584   unset(CUDA_nvcuvid_LIBRARY CACHE)
 585   unset(CUDA_USE_STATIC_CUDA_RUNTIME CACHE)
 586   unset(CUDA_GPU_DETECT_OUTPUT CACHE)
 587 endmacro()
 588
 589 # Check to see if the CUDA_TOOLKIT_ROOT_DIR and CUDA_SDK_ROOT_DIR have changed,
 590 # if they have then clear the cache variables, so that will be detected again.
 591 if(NOT "${CUDA_TOOLKIT_ROOT_DIR}" STREQUAL "${CUDA_TOOLKIT_ROOT_DIR_INTERNAL}")
 592   unset(CUDA_TOOLKIT_TARGET_DIR CACHE)
 593   unset(CUDA_NVCC_EXECUTABLE CACHE)
 594   cuda_unset_include_and_libraries()
 595   unset(CUDA_VERSION CACHE)
 596 endif()
 597
 598 if(NOT "${CUDA_TOOLKIT_TARGET_DIR}" STREQUAL "${CUDA_TOOLKIT_TARGET_DIR_INTERNAL}")
 599   cuda_unset_include_and_libraries()
 600 endif()
 601
 602 #
 603 #  End of unset()
 604 #
 605
 606 #
 607 #  Start looking for things
 608 #
 609
 610 # Search for the cuda distribution.
 611 if(NOT CUDA_TOOLKIT_ROOT_DIR AND NOT CMAKE_CROSSCOMPILING)
 612   # Search in the CUDA_BIN_PATH first.
 613   find_path(CUDA_TOOLKIT_ROOT_DIR
 614     NAMES nvcc nvcc.exe
 615     PATHS
 616       ENV CUDA_TOOLKIT_ROOT
 617       ENV CUDA_PATH
 618       ENV CUDA_BIN_PATH
 619     PATH_SUFFIXES bin bin64
 620     DOC "Toolkit location."
 621     NO_DEFAULT_PATH
 622     )
 623
 624   # Now search default paths
 625   find_path(CUDA_TOOLKIT_ROOT_DIR
 626     NAMES nvcc nvcc.exe
 627     PATHS /opt/cuda/bin
 628           /usr/local/bin
 629           /usr/local/cuda/bin
 630     DOC "Toolkit location."
 631     )
 632
 633   if (CUDA_TOOLKIT_ROOT_DIR)
 634     string(REGEX REPLACE "[/\\\\]?bin[64]*[/\\\\]?$" "" CUDA_TOOLKIT_ROOT_DIR ${CUDA_TOOLKIT_ROOT_DIR})
 635     # We need to force this back into the cache.
 636     set(CUDA_TOOLKIT_ROOT_DIR ${CUDA_TOOLKIT_ROOT_DIR} CACHE PATH "Toolkit location." FORCE)
 637     set(CUDA_TOOLKIT_TARGET_DIR ${CUDA_TOOLKIT_ROOT_DIR})
 638   endif()
 639
 640   if (NOT EXISTS ${CUDA_TOOLKIT_ROOT_DIR})
 641     if(CUDA_FIND_REQUIRED)
 642       message(FATAL_ERROR "Specify CUDA_TOOLKIT_ROOT_DIR")
 643     elseif(NOT CUDA_FIND_QUIETLY)
 644       message("CUDA_TOOLKIT_ROOT_DIR not found or specified")
 645     endif()
 646   endif ()
 647 endif ()
 648
 649 if(CMAKE_CROSSCOMPILING)
 650   SET (CUDA_TOOLKIT_ROOT $ENV{CUDA_TOOLKIT_ROOT})
 651   if(CMAKE_SYSTEM_PROCESSOR STREQUAL "armv7-a")
 652     # Support for NVPACK
 653     set (CUDA_TOOLKIT_TARGET_NAME "armv7-linux-androideabi")
 654   elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "arm")
 655     # Support for arm cross compilation
 656     set(CUDA_TOOLKIT_TARGET_NAME "armv7-linux-gnueabihf")
 657   elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64")
 658     # Support for aarch64 cross compilation
 659     if (ANDROID_ARCH_NAME STREQUAL "arm64")
 660       set(CUDA_TOOLKIT_TARGET_NAME "aarch64-linux-androideabi")
 661     else()
 662       set(CUDA_TOOLKIT_TARGET_NAME "aarch64-linux")
 663     endif (ANDROID_ARCH_NAME STREQUAL "arm64")
 664   endif()
 665
 666   if (EXISTS "${CUDA_TOOLKIT_ROOT}/targets/${CUDA_TOOLKIT_TARGET_NAME}")
 667     set(CUDA_TOOLKIT_TARGET_DIR "${CUDA_TOOLKIT_ROOT}/targets/${CUDA_TOOLKIT_TARGET_NAME}" CACHE PATH "CUDA Toolkit target location.")
 668     SET (CUDA_TOOLKIT_ROOT_DIR ${CUDA_TOOLKIT_ROOT})
 669     mark_as_advanced(CUDA_TOOLKIT_TARGET_DIR)
 670   endif()
 671
 672   # add known CUDA targetr root path to the set of directories we search for programs, libraries and headers
 673   set( CMAKE_FIND_ROOT_PATH "${CUDA_TOOLKIT_TARGET_DIR};${CMAKE_FIND_ROOT_PATH}")
 674   macro( cuda_find_host_program )
 675     find_host_program( ${ARGN} )
 676   endmacro()
 677 else()
 678   # for non-cross-compile, find_host_program == find_program and CUDA_TOOLKIT_TARGET_DIR == CUDA_TOOLKIT_ROOT_DIR
 679   macro( cuda_find_host_program )
 680     find_program( ${ARGN} )
 681   endmacro()
 682   SET (CUDA_TOOLKIT_TARGET_DIR ${CUDA_TOOLKIT_ROOT_DIR})
 683 endif()
 684
 685
 686 # CUDA_NVCC_EXECUTABLE
 687 cuda_find_host_program(CUDA_NVCC_EXECUTABLE
 688   NAMES nvcc
 689   PATHS "${CUDA_TOOLKIT_ROOT_DIR}"
 690   ENV CUDA_PATH
 691   ENV CUDA_BIN_PATH
 692   PATH_SUFFIXES bin bin64
 693   NO_DEFAULT_PATH
 694   )
 695 # Search default search paths, after we search our own set of paths.
 696 cuda_find_host_program(CUDA_NVCC_EXECUTABLE nvcc)
 697 mark_as_advanced(CUDA_NVCC_EXECUTABLE)
 698
 699 if(CUDA_NVCC_EXECUTABLE AND NOT CUDA_VERSION)
 700   # Compute the version.
 701   execute_process (COMMAND ${CUDA_NVCC_EXECUTABLE} "--version" OUTPUT_VARIABLE NVCC_OUT)
 702   string(REGEX REPLACE ".*release ([0-9]+)\\.([0-9]+).*" "\\1" CUDA_VERSION_MAJOR ${NVCC_OUT})
 703   string(REGEX REPLACE ".*release ([0-9]+)\\.([0-9]+).*" "\\2" CUDA_VERSION_MINOR ${NVCC_OUT})
 704   set(CUDA_VERSION "${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR}" CACHE STRING "Version of CUDA as computed from nvcc.")
 705   mark_as_advanced(CUDA_VERSION)
 706 else()
 707   # Need to set these based off of the cached value
 708   string(REGEX REPLACE "([0-9]+)\\.([0-9]+).*" "\\1" CUDA_VERSION_MAJOR "${CUDA_VERSION}")
 709   string(REGEX REPLACE "([0-9]+)\\.([0-9]+).*" "\\2" CUDA_VERSION_MINOR "${CUDA_VERSION}")
 710 endif()
 711
 712
 713 # Always set this convenience variable
 714 set(CUDA_VERSION_STRING "${CUDA_VERSION}")
 715
 716 # CUDA_TOOLKIT_INCLUDE
 717 find_path(CUDA_TOOLKIT_INCLUDE
 718   device_functions.h # Header included in toolkit
 719   PATHS ${CUDA_TOOLKIT_TARGET_DIR}
 720   ENV CUDA_PATH
 721   ENV CUDA_INC_PATH
 722   PATH_SUFFIXES include
 723   NO_DEFAULT_PATH
 724   )
 725 # Search default search paths, after we search our own set of paths.
 726 find_path(CUDA_TOOLKIT_INCLUDE device_functions.h)
 727 mark_as_advanced(CUDA_TOOLKIT_INCLUDE)
 728
 729 if (CUDA_VERSION VERSION_GREATER "7.0" OR EXISTS "${CUDA_TOOLKIT_INCLUDE}/cuda_fp16.h")
 730   set(CUDA_HAS_FP16 TRUE)
 731 else()
 732   set(CUDA_HAS_FP16 FALSE)
 733 endif()
 734
 735 # Set the user list of include dir to nothing to initialize it.
 736 set (CUDA_NVCC_INCLUDE_DIRS_USER "")
 737 set (CUDA_INCLUDE_DIRS ${CUDA_TOOLKIT_INCLUDE})
 738
 739 macro(cuda_find_library_local_first_with_path_ext _var _names _doc _path_ext )
 740   if(CMAKE_SIZEOF_VOID_P EQUAL 8)
 741     # CUDA 3.2+ on Windows moved the library directories, so we need the new
 742     # and old paths.
 743     set(_cuda_64bit_lib_dir "${_path_ext}lib/x64" "${_path_ext}lib64" "${_path_ext}libx64" )
 744   endif()
 745   # CUDA 3.2+ on Windows moved the library directories, so we need to new
 746   # (lib/Win32) and the old path (lib).
 747   find_library(${_var}
 748     NAMES ${_names}
 749     PATHS "${CUDA_TOOLKIT_TARGET_DIR}"
 750     ENV CUDA_PATH
 751     ENV CUDA_LIB_PATH
 752     PATH_SUFFIXES ${_cuda_64bit_lib_dir} "${_path_ext}lib/Win32" "${_path_ext}lib" "${_path_ext}libWin32"
 753     DOC ${_doc}
 754     NO_DEFAULT_PATH
 755     )
 756   if (NOT CMAKE_CROSSCOMPILING)
 757     # Search default search paths, after we search our own set of paths.
 758     find_library(${_var}
 759       NAMES ${_names}
 760       PATHS "/usr/lib/nvidia-current"
 761       DOC ${_doc}
 762       )
 763   endif()
 764 endmacro()
 765
 766 macro(cuda_find_library_local_first _var _names _doc)
 767   cuda_find_library_local_first_with_path_ext( "${_var}" "${_names}" "${_doc}" "" )
 768 endmacro()
 769
 770 macro(find_library_local_first _var _names _doc )
 771   cuda_find_library_local_first( "${_var}" "${_names}" "${_doc}" "" )
 772 endmacro()
 773
 774
 775 # CUDA_LIBRARIES
 776 cuda_find_library_local_first(CUDA_CUDART_LIBRARY cudart "\"cudart\" library")
 777 if(CUDA_VERSION VERSION_EQUAL "3.0")
 778   # The cudartemu library only existed for the 3.0 version of CUDA.
 779   cuda_find_library_local_first(CUDA_CUDARTEMU_LIBRARY cudartemu "\"cudartemu\" library")
 780   mark_as_advanced(
 781     CUDA_CUDARTEMU_LIBRARY
 782     )
 783 endif()
 784
 785 if(NOT CUDA_VERSION VERSION_LESS "5.5")
 786   cuda_find_library_local_first(CUDA_cudart_static_LIBRARY cudart_static "static CUDA runtime library")
 787   mark_as_advanced(CUDA_cudart_static_LIBRARY)
 788 endif()
 789
 790
 791 if(CUDA_cudart_static_LIBRARY)
 792   # If static cudart available, use it by default, but provide a user-visible option to disable it.
 793   option(CUDA_USE_STATIC_CUDA_RUNTIME "Use the static version of the CUDA runtime library if available" ON)
 794   set(CUDA_CUDART_LIBRARY_VAR CUDA_cudart_static_LIBRARY)
 795 else()
 796   # If not available, silently disable the option.
 797   set(CUDA_USE_STATIC_CUDA_RUNTIME OFF CACHE INTERNAL "")
 798   set(CUDA_CUDART_LIBRARY_VAR CUDA_CUDART_LIBRARY)
 799 endif()
 800 if(NOT CUDA_VERSION VERSION_LESS "5.0")
 801   cuda_find_library_local_first(CUDA_cudadevrt_LIBRARY cudadevrt "\"cudadevrt\" library")
 802   mark_as_advanced(CUDA_cudadevrt_LIBRARY)
 803 endif()
 804
 805 if(CUDA_USE_STATIC_CUDA_RUNTIME)
 806   if(UNIX)
 807     # Check for the dependent libraries.  Here we look for pthreads.
 808     if (DEFINED CMAKE_THREAD_PREFER_PTHREAD)
 809       set(_cuda_cmake_thread_prefer_pthread ${CMAKE_THREAD_PREFER_PTHREAD})
 810     endif()
 811     set(CMAKE_THREAD_PREFER_PTHREAD 1)
 812
 813     # Many of the FindXYZ CMake comes with makes use of try_compile with int main(){return 0;}
 814     # as the source file.  Unfortunately this causes a warning with -Wstrict-prototypes and
 815     # -Werror causes the try_compile to fail.  We will just temporarily disable other flags
 816     # when doing the find_package command here.
 817     set(_cuda_cmake_c_flags ${CMAKE_C_FLAGS})
 818     set(CMAKE_C_FLAGS "-fPIC")
 819     find_package(Threads REQUIRED)
 820     set(CMAKE_C_FLAGS ${_cuda_cmake_c_flags})
 821
 822     if (DEFINED _cuda_cmake_thread_prefer_pthread)
 823       set(CMAKE_THREAD_PREFER_PTHREAD ${_cuda_cmake_thread_prefer_pthread})
 824       unset(_cuda_cmake_thread_prefer_pthread)
 825     else()
 826       unset(CMAKE_THREAD_PREFER_PTHREAD)
 827     endif()
 828
 829     if(NOT APPLE)
 830       #On Linux, you must link against librt when using the static cuda runtime.
 831       find_library(CUDA_rt_LIBRARY rt)
 832       if (NOT CUDA_rt_LIBRARY)
 833         message(WARNING "Expecting to find librt for libcudart_static, but didn't find it.")
 834       endif()
 835     endif()
 836   endif()
 837 endif()
 838
 839 # CUPTI library showed up in cuda toolkit 4.0
 840 if(NOT CUDA_VERSION VERSION_LESS "4.0")
 841   cuda_find_library_local_first_with_path_ext(CUDA_cupti_LIBRARY cupti "\"cupti\" library" "extras/CUPTI/")
 842   mark_as_advanced(CUDA_cupti_LIBRARY)
 843 endif()
 844
 845 # Set the CUDA_LIBRARIES variable.  This is the set of stuff to link against if you are
 846 # using the CUDA runtime.  For the dynamic version of the runtime, most of the
 847 # dependencies are brough in, but for the static version there are additional libraries
 848 # and linker commands needed.
 849 # Initialize to empty
 850 set(CUDA_LIBRARIES)
 851
 852 # If we are using emulation mode and we found the cudartemu library then use
 853 # that one instead of cudart.
 854 if(CUDA_BUILD_EMULATION AND CUDA_CUDARTEMU_LIBRARY)
 855   list(APPEND CUDA_LIBRARIES ${CUDA_CUDARTEMU_LIBRARY})
 856 elseif(CUDA_USE_STATIC_CUDA_RUNTIME AND CUDA_cudart_static_LIBRARY)
 857   list(APPEND CUDA_LIBRARIES ${CUDA_cudart_static_LIBRARY} ${CMAKE_THREAD_LIBS_INIT} ${CMAKE_DL_LIBS})
 858   if (CUDA_rt_LIBRARY)
 859     list(APPEND CUDA_LIBRARIES ${CUDA_rt_LIBRARY})
 860   endif()
 861   if(APPLE)
 862     # We need to add the default path to the driver (libcuda.dylib) as an rpath, so that
 863     # the static cuda runtime can find it at runtime.
 864     list(APPEND CUDA_LIBRARIES -Wl,-rpath,/usr/local/cuda/lib)
 865   endif()
 866 else()
 867   list(APPEND CUDA_LIBRARIES ${CUDA_CUDART_LIBRARY})
 868 endif()
 869
 870 # 1.1 toolkit on linux doesn't appear to have a separate library on
 871 # some platforms.
 872 cuda_find_library_local_first(CUDA_CUDA_LIBRARY cuda "\"cuda\" library (older versions only).")
 873
 874 mark_as_advanced(
 875   CUDA_CUDA_LIBRARY
 876   CUDA_CUDART_LIBRARY
 877   )
 878
 879 #######################
 880 # Look for some of the toolkit helper libraries
 881 macro(FIND_CUDA_HELPER_LIBS _name)
 882   cuda_find_library_local_first(CUDA_${_name}_LIBRARY ${_name} "\"${_name}\" library")
 883   mark_as_advanced(CUDA_${_name}_LIBRARY)
 884 endmacro()
 885
 886 #######################
 887 # Disable emulation for v3.1 onward
 888 if(CUDA_VERSION VERSION_GREATER "3.0")
 889   if(CUDA_BUILD_EMULATION)
 890     message(FATAL_ERROR "CUDA_BUILD_EMULATION is not supported in version 3.1 and onwards.  You must disable it to proceed.  You have version ${CUDA_VERSION}.")
 891   endif()
 892 endif()
 893
 894 # Search for additional CUDA toolkit libraries.
 895 if(CUDA_VERSION VERSION_LESS "3.1")
 896   # Emulation libraries aren't available in version 3.1 onward.
 897   find_cuda_helper_libs(cufftemu)
 898   find_cuda_helper_libs(cublasemu)
 899 endif()
 900 find_cuda_helper_libs(cufft)
 901 find_cuda_helper_libs(cublas)
 902 if(NOT CUDA_VERSION VERSION_LESS "3.2")
 903   # cusparse showed up in version 3.2
 904   find_cuda_helper_libs(cusparse)
 905   find_cuda_helper_libs(curand)
 906   if (WIN32)
 907     find_cuda_helper_libs(nvcuvenc)
 908     find_cuda_helper_libs(nvcuvid)
 909   endif()
 910 endif()
 911 if(CUDA_VERSION VERSION_GREATER "5.0")
 912   find_cuda_helper_libs(cublas_device)
 913   # In CUDA 5.5 NPP was splitted onto 3 separate libraries.
 914   find_cuda_helper_libs(nppc)
 915   find_cuda_helper_libs(nppi)
 916   find_cuda_helper_libs(npps)
 917   set(CUDA_npp_LIBRARY "${CUDA_nppc_LIBRARY};${CUDA_nppi_LIBRARY};${CUDA_npps_LIBRARY}")
 918 elseif(NOT CUDA_VERSION VERSION_LESS "4.0")
 919   find_cuda_helper_libs(npp)
 920 endif()
 921 if(NOT CUDA_VERSION VERSION_LESS "7.0")
 922   # cusolver showed up in version 7.0
 923   find_cuda_helper_libs(cusolver)
 924 endif()
 925
 926 if (CUDA_BUILD_EMULATION)
 927   set(CUDA_CUFFT_LIBRARIES ${CUDA_cufftemu_LIBRARY})
 928   set(CUDA_CUBLAS_LIBRARIES ${CUDA_cublasemu_LIBRARY})
 929 else()
 930   set(CUDA_CUFFT_LIBRARIES ${CUDA_cufft_LIBRARY})
 931   set(CUDA_CUBLAS_LIBRARIES ${CUDA_cublas_LIBRARY} ${CUDA_cublas_device_LIBRARY})
 932 endif()
 933
 934 ########################
 935 # Look for the SDK stuff.  As of CUDA 3.0 NVSDKCUDA_ROOT has been replaced with
 936 # NVSDKCOMPUTE_ROOT with the old CUDA C contents moved into the C subdirectory
 937 find_path(CUDA_SDK_ROOT_DIR common/inc/cutil.h
 938  HINTS
 939   "$ENV{NVSDKCOMPUTE_ROOT}/C"
 940   ENV NVSDKCUDA_ROOT
 941   "[HKEY_LOCAL_MACHINE\\SOFTWARE\\NVIDIA Corporation\\Installed Products\\NVIDIA SDK 10\\Compute;InstallDir]"
 942  PATHS
 943   "/Developer/GPU\ Computing/C"
 944   )
 945
 946 # Keep the CUDA_SDK_ROOT_DIR first in order to be able to override the
 947 # environment variables.
 948 set(CUDA_SDK_SEARCH_PATH
 949   "${CUDA_SDK_ROOT_DIR}"
 950   "${CUDA_TOOLKIT_ROOT_DIR}/local/NVSDK0.2"
 951   "${CUDA_TOOLKIT_ROOT_DIR}/NVSDK0.2"
 952   "${CUDA_TOOLKIT_ROOT_DIR}/NV_CUDA_SDK"
 953   "$ENV{HOME}/NVIDIA_CUDA_SDK"
 954   "$ENV{HOME}/NVIDIA_CUDA_SDK_MACOSX"
 955   "/Developer/CUDA"
 956   )
 957
 958 # Example of how to find an include file from the CUDA_SDK_ROOT_DIR
 959
 960 # find_path(CUDA_CUT_INCLUDE_DIR
 961 #   cutil.h
 962 #   PATHS ${CUDA_SDK_SEARCH_PATH}
 963 #   PATH_SUFFIXES "common/inc"
 964 #   DOC "Location of cutil.h"
 965 #   NO_DEFAULT_PATH
 966 #   )
 967 # # Now search system paths
 968 # find_path(CUDA_CUT_INCLUDE_DIR cutil.h DOC "Location of cutil.h")
 969
 970 # mark_as_advanced(CUDA_CUT_INCLUDE_DIR)
 971
 972
 973 # Example of how to find a library in the CUDA_SDK_ROOT_DIR
 974
 975 # # cutil library is called cutil64 for 64 bit builds on windows.  We don't want
 976 # # to get these confused, so we are setting the name based on the word size of
 977 # # the build.
 978
 979 # if(CMAKE_SIZEOF_VOID_P EQUAL 8)
 980 #   set(cuda_cutil_name cutil64)
 981 # else()
 982 #   set(cuda_cutil_name cutil32)
 983 # endif()
 984
 985 # find_library(CUDA_CUT_LIBRARY
 986 #   NAMES cutil ${cuda_cutil_name}
 987 #   PATHS ${CUDA_SDK_SEARCH_PATH}
 988 #   # The new version of the sdk shows up in common/lib, but the old one is in lib
 989 #   PATH_SUFFIXES "common/lib" "lib"
 990 #   DOC "Location of cutil library"
 991 #   NO_DEFAULT_PATH
 992 #   )
 993 # # Now search system paths
 994 # find_library(CUDA_CUT_LIBRARY NAMES cutil ${cuda_cutil_name} DOC "Location of cutil library")
 995 # mark_as_advanced(CUDA_CUT_LIBRARY)
 996 # set(CUDA_CUT_LIBRARIES ${CUDA_CUT_LIBRARY})
 997
 998
 999
1000 #############################
1001 # Check for required components
1002 set(CUDA_FOUND TRUE)
1003
1004 set(CUDA_TOOLKIT_ROOT_DIR_INTERNAL "${CUDA_TOOLKIT_ROOT_DIR}" CACHE INTERNAL
1005   "This is the value of the last time CUDA_TOOLKIT_ROOT_DIR was set successfully." FORCE)
1006 set(CUDA_TOOLKIT_TARGET_DIR_INTERNAL "${CUDA_TOOLKIT_TARGET_DIR}" CACHE INTERNAL
1007   "This is the value of the last time CUDA_TOOLKIT_TARGET_DIR was set successfully." FORCE)
1008 set(CUDA_SDK_ROOT_DIR_INTERNAL "${CUDA_SDK_ROOT_DIR}" CACHE INTERNAL
1009   "This is the value of the last time CUDA_SDK_ROOT_DIR was set successfully." FORCE)
1010
1011 include(${CMAKE_CURRENT_LIST_DIR}/FindPackageHandleStandardArgs.cmake)
1012
1013 find_package_handle_standard_args(CUDA
1014   REQUIRED_VARS
1015     CUDA_TOOLKIT_ROOT_DIR
1016     CUDA_NVCC_EXECUTABLE
1017     CUDA_INCLUDE_DIRS
1018     ${CUDA_CUDART_LIBRARY_VAR}
1019   VERSION_VAR
1020     CUDA_VERSION
1021   )
1022
1023
1024
1025 ###############################################################################
1026 ###############################################################################
1027 # Macros
1028 ###############################################################################
1029 ###############################################################################
1030
1031 ###############################################################################
1032 # Add include directories to pass to the nvcc command.
1033 macro(CUDA_INCLUDE_DIRECTORIES)
1034   foreach(dir ${ARGN})
1035     list(APPEND CUDA_NVCC_INCLUDE_DIRS_USER ${dir})
1036   endforeach()
1037 endmacro()
1038
1039
1040 ##############################################################################
1041 cuda_find_helper_file(parse_cubin cmake)
1042 cuda_find_helper_file(make2cmake cmake)
1043 cuda_find_helper_file(run_nvcc cmake)
1044 include("${CMAKE_CURRENT_LIST_DIR}/FindCUDA/select_compute_arch.cmake")
1045
1046 ##############################################################################
1047 # Separate the OPTIONS out from the sources
1048 #
1049 macro(CUDA_GET_SOURCES_AND_OPTIONS _sources _cmake_options _options)
1050   set( ${_sources} )
1051   set( ${_cmake_options} )
1052   set( ${_options} )
1053   set( _found_options FALSE )
1054   foreach(arg ${ARGN})
1055     if("x${arg}" STREQUAL "xOPTIONS")
1056       set( _found_options TRUE )
1057     elseif(
1058         "x${arg}" STREQUAL "xWIN32" OR
1059         "x${arg}" STREQUAL "xMACOSX_BUNDLE" OR
1060         "x${arg}" STREQUAL "xEXCLUDE_FROM_ALL" OR
1061         "x${arg}" STREQUAL "xSTATIC" OR
1062         "x${arg}" STREQUAL "xSHARED" OR
1063         "x${arg}" STREQUAL "xMODULE"
1064         )
1065       list(APPEND ${_cmake_options} ${arg})
1066     else()
1067       if ( _found_options )
1068         list(APPEND ${_options} ${arg})
1069       else()
1070         # Assume this is a file
1071         list(APPEND ${_sources} ${arg})
1072       endif()
1073     endif()
1074   endforeach()
1075 endmacro()
1076
1077 ##############################################################################
1078 # Parse the OPTIONS from ARGN and set the variables prefixed by _option_prefix
1079 #
1080 macro(CUDA_PARSE_NVCC_OPTIONS _option_prefix)
1081   set( _found_config )
1082   foreach(arg ${ARGN})
1083     # Determine if we are dealing with a perconfiguration flag
1084     foreach(config ${CUDA_configuration_types})
1085       string(TOUPPER ${config} config_upper)
1086       if (arg STREQUAL "${config_upper}")
1087         set( _found_config _${arg})
1088         # Set arg to nothing to keep it from being processed further
1089         set( arg )
1090       endif()
1091     endforeach()
1092
1093     if ( arg )
1094       list(APPEND ${_option_prefix}${_found_config} "${arg}")
1095     endif()
1096   endforeach()
1097 endmacro()
1098
1099 ##############################################################################
1100 # Helper to add the include directory for CUDA only once
1101 function(CUDA_ADD_CUDA_INCLUDE_ONCE)
1102   get_directory_property(_include_directories INCLUDE_DIRECTORIES)
1103   set(_add TRUE)
1104   if(_include_directories)
1105     foreach(dir ${_include_directories})
1106       if("${dir}" STREQUAL "${CUDA_INCLUDE_DIRS}")
1107         set(_add FALSE)
1108       endif()
1109     endforeach()
1110   endif()
1111   if(_add)
1112     include_directories(${CUDA_INCLUDE_DIRS})
1113   endif()
1114 endfunction()
1115
1116 function(CUDA_BUILD_SHARED_LIBRARY shared_flag)
1117   set(cmake_args ${ARGN})
1118   # If SHARED, MODULE, or STATIC aren't already in the list of arguments, then
1119   # add SHARED or STATIC based on the value of BUILD_SHARED_LIBS.
1120   list(FIND cmake_args SHARED _cuda_found_SHARED)
1121   list(FIND cmake_args MODULE _cuda_found_MODULE)
1122   list(FIND cmake_args STATIC _cuda_found_STATIC)
1123   if( _cuda_found_SHARED GREATER -1 OR
1124       _cuda_found_MODULE GREATER -1 OR
1125       _cuda_found_STATIC GREATER -1)
1126     set(_cuda_build_shared_libs)
1127   else()
1128     if (BUILD_SHARED_LIBS)
1129       set(_cuda_build_shared_libs SHARED)
1130     else()
1131       set(_cuda_build_shared_libs STATIC)
1132     endif()
1133   endif()
1134   set(${shared_flag} ${_cuda_build_shared_libs} PARENT_SCOPE)
1135 endfunction()
1136
1137 ##############################################################################
1138 # Helper to avoid clashes of files with the same basename but different paths.
1139 # This doesn't attempt to do exactly what CMake internals do, which is to only
1140 # add this path when there is a conflict, since by the time a second collision
1141 # in names is detected it's already too late to fix the first one.  For
1142 # consistency sake the relative path will be added to all files.
1143 function(CUDA_COMPUTE_BUILD_PATH path build_path)
1144   #message("CUDA_COMPUTE_BUILD_PATH([${path}] ${build_path})")
1145   # Only deal with CMake style paths from here on out
1146   file(TO_CMAKE_PATH "${path}" bpath)
1147   if (IS_ABSOLUTE "${bpath}")
1148     # Absolute paths are generally unnessary, especially if something like
1149     # file(GLOB_RECURSE) is used to pick up the files.
1150
1151     string(FIND "${bpath}" "${CMAKE_CURRENT_BINARY_DIR}" _binary_dir_pos)
1152     if (_binary_dir_pos EQUAL 0)
1153       file(RELATIVE_PATH bpath "${CMAKE_CURRENT_BINARY_DIR}" "${bpath}")
1154     else()
1155       file(RELATIVE_PATH bpath "${CMAKE_CURRENT_SOURCE_DIR}" "${bpath}")
1156     endif()
1157   endif()
1158
1159   # This recipe is from cmLocalGenerator::CreateSafeUniqueObjectFileName in the
1160   # CMake source.
1161
1162   # Remove leading /
1163   string(REGEX REPLACE "^[/]+" "" bpath "${bpath}")
1164   # Avoid absolute paths by removing ':'
1165   string(REPLACE ":" "_" bpath "${bpath}")
1166   # Avoid relative paths that go up the tree
1167   string(REPLACE "../" "__/" bpath "${bpath}")
1168   # Avoid spaces
1169   string(REPLACE " " "_" bpath "${bpath}")
1170
1171   # Strip off the filename.  I wait until here to do it, since removin the
1172   # basename can make a path that looked like path/../basename turn into
1173   # path/.. (notice the trailing slash).
1174   get_filename_component(bpath "${bpath}" PATH)
1175
1176   set(${build_path} "${bpath}" PARENT_SCOPE)
1177   #message("${build_path} = ${bpath}")
1178 endfunction()
1179
1180 ##############################################################################
1181 # This helper macro populates the following variables and setups up custom
1182 # commands and targets to invoke the nvcc compiler to generate C or PTX source
1183 # dependent upon the format parameter.  The compiler is invoked once with -M
1184 # to generate a dependency file and a second time with -cuda or -ptx to generate
1185 # a .cpp or .ptx file.
1186 # INPUT:
1187 #   cuda_target         - Target name
1188 #   format              - PTX, CUBIN, FATBIN or OBJ
1189 #   FILE1 .. FILEN      - The remaining arguments are the sources to be wrapped.
1190 #   OPTIONS             - Extra options to NVCC
1191 # OUTPUT:
1192 #   generated_files     - List of generated files
1193 ##############################################################################
1194 ##############################################################################
1195
1196 macro(CUDA_WRAP_SRCS cuda_target format generated_files)
1197
1198   # Put optional arguments in list.
1199   set(_argn_list "${ARGN}")
1200   # If one of the given optional arguments is "PHONY", make a note of it, then
1201   # remove it from the list.
1202   list(FIND _argn_list "PHONY" _phony_idx)
1203   if("${_phony_idx}" GREATER "-1")
1204     set(_target_is_phony true)
1205     list(REMOVE_AT _argn_list ${_phony_idx})
1206   else()
1207     set(_target_is_phony false)
1208   endif()
1209
1210   # If CMake doesn't support separable compilation, complain
1211   if(CUDA_SEPARABLE_COMPILATION AND CMAKE_VERSION VERSION_LESS "2.8.10.1")
1212     message(SEND_ERROR "CUDA_SEPARABLE_COMPILATION isn't supported for CMake versions less than 2.8.10.1")
1213   endif()
1214
1215   # Set up all the command line flags here, so that they can be overridden on a per target basis.
1216
1217   set(nvcc_flags "")
1218
1219   # Emulation if the card isn't present.
1220   if (CUDA_BUILD_EMULATION)
1221     # Emulation.
1222     set(nvcc_flags ${nvcc_flags} --device-emulation -D_DEVICEEMU -g)
1223   else()
1224     # Device mode.  No flags necessary.
1225   endif()
1226
1227   if(CUDA_HOST_COMPILATION_CPP)
1228     set(CUDA_C_OR_CXX CXX)
1229   else()
1230     if(CUDA_VERSION VERSION_LESS "3.0")
1231       set(nvcc_flags ${nvcc_flags} --host-compilation C)
1232     else()
1233       message(WARNING "--host-compilation flag is deprecated in CUDA version >= 3.0.  Removing --host-compilation C flag" )
1234     endif()
1235     set(CUDA_C_OR_CXX C)
1236   endif()
1237
1238   set(generated_extension ${CMAKE_${CUDA_C_OR_CXX}_OUTPUT_EXTENSION})
1239
1240   if(CUDA_64_BIT_DEVICE_CODE)
1241     set(nvcc_flags ${nvcc_flags} -m64)
1242   else()
1243     set(nvcc_flags ${nvcc_flags} -m32)
1244   endif()
1245
1246   if(CUDA_TARGET_CPU_ARCH)
1247     set(nvcc_flags ${nvcc_flags} "--target-cpu-architecture=${CUDA_TARGET_CPU_ARCH}")
1248   endif()
1249
1250   # This needs to be passed in at this stage, because VS needs to fill out the
1251   # value of VCInstallDir from within VS.  Note that CCBIN is only used if
1252   # -ccbin or --compiler-bindir isn't used and CUDA_HOST_COMPILER matches
1253   # $(VCInstallDir)/bin.
1254   if(CMAKE_GENERATOR MATCHES "Visual Studio")
1255     set(ccbin_flags -D "\"CCBIN:PATH=$(VCInstallDir)bin\"" )
1256   else()
1257     set(ccbin_flags)
1258   endif()
1259
1260   # Figure out which configure we will use and pass that in as an argument to
1261   # the script.  We need to defer the decision until compilation time, because
1262   # for VS projects we won't know if we are making a debug or release build
1263   # until build time.
1264   if(CMAKE_GENERATOR MATCHES "Visual Studio")
1265     set( CUDA_build_configuration "$(ConfigurationName)" )
1266   else()
1267     set( CUDA_build_configuration "${CMAKE_BUILD_TYPE}")
1268   endif()
1269
1270   # Initialize our list of includes with the user ones followed by the CUDA system ones.
1271   set(CUDA_NVCC_INCLUDE_DIRS ${CUDA_NVCC_INCLUDE_DIRS_USER} "${CUDA_INCLUDE_DIRS}")
1272   if(_target_is_phony)
1273     # If the passed in target name isn't a real target (i.e., this is from a call to one of the
1274     # cuda_compile_* functions), need to query directory properties to get include directories
1275     # and compile definitions.
1276     get_directory_property(_dir_include_dirs INCLUDE_DIRECTORIES)
1277     get_directory_property(_dir_compile_defs COMPILE_DEFINITIONS)
1278
1279     list(APPEND CUDA_NVCC_INCLUDE_DIRS "${_dir_include_dirs}")
1280     set(CUDA_NVCC_COMPILE_DEFINITIONS "${_dir_compile_defs}")
1281   else()
1282     # Append the include directories for this target via generator expression, which is
1283     # expanded by the FILE(GENERATE) call below.  This generator expression captures all
1284     # include dirs set by the user, whether via directory properties or target properties
1285     list(APPEND CUDA_NVCC_INCLUDE_DIRS "$<TARGET_PROPERTY:${cuda_target},INCLUDE_DIRECTORIES>")
1286
1287     # Do the same thing with compile definitions
1288     set(CUDA_NVCC_COMPILE_DEFINITIONS "$<TARGET_PROPERTY:${cuda_target},COMPILE_DEFINITIONS>")
1289   endif()
1290
1291
1292   # Reset these variables
1293   set(CUDA_WRAP_OPTION_NVCC_FLAGS)
1294   foreach(config ${CUDA_configuration_types})
1295     string(TOUPPER ${config} config_upper)
1296     set(CUDA_WRAP_OPTION_NVCC_FLAGS_${config_upper})
1297   endforeach()
1298
1299   CUDA_GET_SOURCES_AND_OPTIONS(_cuda_wrap_sources _cuda_wrap_cmake_options _cuda_wrap_options ${_argn_list})
1300   CUDA_PARSE_NVCC_OPTIONS(CUDA_WRAP_OPTION_NVCC_FLAGS ${_cuda_wrap_options})
1301
1302   # Figure out if we are building a shared library.  BUILD_SHARED_LIBS is
1303   # respected in CUDA_ADD_LIBRARY.
1304   set(_cuda_build_shared_libs FALSE)
1305   # SHARED, MODULE
1306   list(FIND _cuda_wrap_cmake_options SHARED _cuda_found_SHARED)
1307   list(FIND _cuda_wrap_cmake_options MODULE _cuda_found_MODULE)
1308   if(_cuda_found_SHARED GREATER -1 OR _cuda_found_MODULE GREATER -1)
1309     set(_cuda_build_shared_libs TRUE)
1310   endif()
1311   # STATIC
1312   list(FIND _cuda_wrap_cmake_options STATIC _cuda_found_STATIC)
1313   if(_cuda_found_STATIC GREATER -1)
1314     set(_cuda_build_shared_libs FALSE)
1315   endif()
1316
1317   # CUDA_HOST_FLAGS
1318   if(_cuda_build_shared_libs)
1319     # If we are setting up code for a shared library, then we need to add extra flags for
1320     # compiling objects for shared libraries.
1321     set(CUDA_HOST_SHARED_FLAGS ${CMAKE_SHARED_LIBRARY_${CUDA_C_OR_CXX}_FLAGS})
1322   else()
1323     set(CUDA_HOST_SHARED_FLAGS)
1324   endif()
1325   # Only add the CMAKE_{C,CXX}_FLAGS if we are propagating host flags.  We
1326   # always need to set the SHARED_FLAGS, though.
1327   if(CUDA_PROPAGATE_HOST_FLAGS)
1328     set(_cuda_host_flags "set(CMAKE_HOST_FLAGS ${CMAKE_${CUDA_C_OR_CXX}_FLAGS} ${CUDA_HOST_SHARED_FLAGS})")
1329   else()
1330     set(_cuda_host_flags "set(CMAKE_HOST_FLAGS ${CUDA_HOST_SHARED_FLAGS})")
1331   endif()
1332
1333   set(_cuda_nvcc_flags_config "# Build specific configuration flags")
1334   # Loop over all the configuration types to generate appropriate flags for run_nvcc.cmake
1335   foreach(config ${CUDA_configuration_types})
1336     string(TOUPPER ${config} config_upper)
1337     # CMAKE_FLAGS are strings and not lists.  By not putting quotes around CMAKE_FLAGS
1338     # we convert the strings to lists (like we want).
1339
1340     if(CUDA_PROPAGATE_HOST_FLAGS)
1341       # nvcc chokes on -g3 in versions previous to 3.0, so replace it with -g
1342       set(_cuda_fix_g3 FALSE)
1343
1344       if(CMAKE_COMPILER_IS_GNUCC)
1345         if (CUDA_VERSION VERSION_LESS  "3.0" OR
1346             CUDA_VERSION VERSION_EQUAL "4.1" OR
1347             CUDA_VERSION VERSION_EQUAL "4.2"
1348             )
1349           set(_cuda_fix_g3 TRUE)
1350         endif()
1351       endif()
1352       if(_cuda_fix_g3)
1353         string(REPLACE "-g3" "-g" _cuda_C_FLAGS "${CMAKE_${CUDA_C_OR_CXX}_FLAGS_${config_upper}}")
1354       else()
1355         set(_cuda_C_FLAGS "${CMAKE_${CUDA_C_OR_CXX}_FLAGS_${config_upper}}")
1356       endif()
1357
1358       string(APPEND _cuda_host_flags "\nset(CMAKE_HOST_FLAGS_${config_upper} ${_cuda_C_FLAGS})")
1359     endif()
1360
1361     # Note that if we ever want CUDA_NVCC_FLAGS_<CONFIG> to be string (instead of a list
1362     # like it is currently), we can remove the quotes around the
1363     # ${CUDA_NVCC_FLAGS_${config_upper}} variable like the CMAKE_HOST_FLAGS_<CONFIG> variable.
1364     string(APPEND _cuda_nvcc_flags_config "\nset(CUDA_NVCC_FLAGS_${config_upper} ${CUDA_NVCC_FLAGS_${config_upper}} ;; ${CUDA_WRAP_OPTION_NVCC_FLAGS_${config_upper}})")
1365   endforeach()
1366
1367   # Process the C++11 flag.  If the host sets the flag, we need to add it to nvcc and
1368   # remove it from the host. This is because -Xcompile -std=c++ will choke nvcc (it uses
1369   # the C preprocessor).  In order to get this to work correctly, we need to use nvcc's
1370   # specific c++11 flag.
1371   if( "${_cuda_host_flags}" MATCHES "-std=c\\+\\+11")
1372     # Add the c++11 flag to nvcc if it isn't already present.  Note that we only look at
1373     # the main flag instead of the configuration specific flags.
1374     if( NOT "${CUDA_NVCC_FLAGS}" MATCHES "-std;c\\+\\+11" )
1375       list(APPEND nvcc_flags --std c++11)
1376     endif()
1377     string(REGEX REPLACE "[-]+std=c\\+\\+11" "" _cuda_host_flags "${_cuda_host_flags}")
1378   endif()
1379
1380   if(_cuda_build_shared_libs)
1381     list(APPEND nvcc_flags "-D${cuda_target}_EXPORTS")
1382   endif()
1383
1384   # Reset the output variable
1385   set(_cuda_wrap_generated_files "")
1386
1387   # Iterate over the macro arguments and create custom
1388   # commands for all the .cu files.
1389   foreach(file ${_argn_list})
1390     # Ignore any file marked as a HEADER_FILE_ONLY
1391     get_source_file_property(_is_header ${file} HEADER_FILE_ONLY)
1392     # Allow per source file overrides of the format.  Also allows compiling non-.cu files.
1393     get_source_file_property(_cuda_source_format ${file} CUDA_SOURCE_PROPERTY_FORMAT)
1394     if((${file} MATCHES "\\.cu$" OR _cuda_source_format) AND NOT _is_header)
1395
1396       if(NOT _cuda_source_format)
1397         set(_cuda_source_format ${format})
1398       endif()
1399       # If file isn't a .cu file, we need to tell nvcc to treat it as such.
1400       if(NOT ${file} MATCHES "\\.cu$")
1401         set(cuda_language_flag -x=cu)
1402       else()
1403         set(cuda_language_flag)
1404       endif()
1405
1406       if( ${_cuda_source_format} MATCHES "OBJ")
1407         set( cuda_compile_to_external_module OFF )
1408       else()
1409         set( cuda_compile_to_external_module ON )
1410         if( ${_cuda_source_format} MATCHES "PTX" )
1411           set( cuda_compile_to_external_module_type "ptx" )
1412         elseif( ${_cuda_source_format} MATCHES "CUBIN")
1413           set( cuda_compile_to_external_module_type "cubin" )
1414         elseif( ${_cuda_source_format} MATCHES "FATBIN")
1415           set( cuda_compile_to_external_module_type "fatbin" )
1416         else()
1417           message( FATAL_ERROR "Invalid format flag passed to CUDA_WRAP_SRCS or set with CUDA_SOURCE_PROPERTY_FORMAT file property for file '${file}': '${_cuda_source_format}'.  Use OBJ, PTX, CUBIN or FATBIN.")
1418         endif()
1419       endif()
1420
1421       if(cuda_compile_to_external_module)
1422         # Don't use any of the host compilation flags for PTX targets.
1423         set(CUDA_HOST_FLAGS)
1424         set(CUDA_NVCC_FLAGS_CONFIG)
1425       else()
1426         set(CUDA_HOST_FLAGS ${_cuda_host_flags})
1427         set(CUDA_NVCC_FLAGS_CONFIG ${_cuda_nvcc_flags_config})
1428       endif()
1429
1430       # Determine output directory
1431       cuda_compute_build_path("${file}" cuda_build_path)
1432       set(cuda_compile_intermediate_directory "${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/${cuda_target}.dir/${cuda_build_path}")
1433       if(CUDA_GENERATED_OUTPUT_DIR)
1434         set(cuda_compile_output_dir "${CUDA_GENERATED_OUTPUT_DIR}")
1435       else()
1436         if ( cuda_compile_to_external_module )
1437           set(cuda_compile_output_dir "${CMAKE_CURRENT_BINARY_DIR}")
1438         else()
1439           set(cuda_compile_output_dir "${cuda_compile_intermediate_directory}")
1440         endif()
1441       endif()
1442
1443       # Add a custom target to generate a c or ptx file. ######################
1444
1445       get_filename_component( basename ${file} NAME )
1446       if( cuda_compile_to_external_module )
1447         set(generated_file_path "${cuda_compile_output_dir}")
1448         set(generated_file_basename "${cuda_target}_generated_${basename}.${cuda_compile_to_external_module_type}")
1449         set(format_flag "-${cuda_compile_to_external_module_type}")
1450         file(MAKE_DIRECTORY "${cuda_compile_output_dir}")
1451       else()
1452         set(generated_file_path "${cuda_compile_output_dir}/${CMAKE_CFG_INTDIR}")
1453         set(generated_file_basename "${cuda_target}_generated_${basename}${generated_extension}")
1454         if(CUDA_SEPARABLE_COMPILATION)
1455           set(format_flag "-dc")
1456         else()
1457           set(format_flag "-c")
1458         endif()
1459       endif()
1460
1461       # Set all of our file names.  Make sure that whatever filenames that have
1462       # generated_file_path in them get passed in through as a command line
1463       # argument, so that the ${CMAKE_CFG_INTDIR} gets expanded at run time
1464       # instead of configure time.
1465       set(generated_file "${generated_file_path}/${generated_file_basename}")
1466       set(cmake_dependency_file "${cuda_compile_intermediate_directory}/${generated_file_basename}.depend")
1467       set(NVCC_generated_dependency_file "${cuda_compile_intermediate_directory}/${generated_file_basename}.NVCC-depend")
1468       set(generated_cubin_file "${generated_file_path}/${generated_file_basename}.cubin.txt")
1469       set(custom_target_script_pregen "${cuda_compile_intermediate_directory}/${generated_file_basename}.cmake.pre-gen")
1470       set(custom_target_script "${cuda_compile_intermediate_directory}/${generated_file_basename}$<$<BOOL:$<CONFIG>>:.$<CONFIG>>.cmake")
1471
1472       # Setup properties for obj files:
1473       if( NOT cuda_compile_to_external_module )
1474         set_source_files_properties("${generated_file}"
1475           PROPERTIES
1476           EXTERNAL_OBJECT true # This is an object file not to be compiled, but only be linked.
1477           )
1478       endif()
1479
1480       # Don't add CMAKE_CURRENT_SOURCE_DIR if the path is already an absolute path.
1481       get_filename_component(file_path "${file}" PATH)
1482       if(IS_ABSOLUTE "${file_path}")
1483         set(source_file "${file}")
1484       else()
1485         set(source_file "${CMAKE_CURRENT_SOURCE_DIR}/${file}")
1486       endif()
1487
1488       if( NOT cuda_compile_to_external_module AND CUDA_SEPARABLE_COMPILATION)
1489         list(APPEND ${cuda_target}_SEPARABLE_COMPILATION_OBJECTS "${generated_file}")
1490       endif()
1491
1492       # Bring in the dependencies.  Creates a variable CUDA_NVCC_DEPEND #######
1493       cuda_include_nvcc_dependencies(${cmake_dependency_file})
1494
1495       # Convience string for output ###########################################
1496       if(CUDA_BUILD_EMULATION)
1497         set(cuda_build_type "Emulation")
1498       else()
1499         set(cuda_build_type "Device")
1500       endif()
1501
1502       # Build the NVCC made dependency file ###################################
1503       set(build_cubin OFF)
1504       if ( NOT CUDA_BUILD_EMULATION AND CUDA_BUILD_CUBIN )
1505          if ( NOT cuda_compile_to_external_module )
1506            set ( build_cubin ON )
1507          endif()
1508       endif()
1509
1510       # Configure the build script
1511       configure_file("${CUDA_run_nvcc}" "${custom_target_script_pregen}" @ONLY)
1512       file(GENERATE
1513         OUTPUT "${custom_target_script}"
1514         INPUT "${custom_target_script_pregen}"
1515         )
1516
1517       # So if a user specifies the same cuda file as input more than once, you
1518       # can have bad things happen with dependencies.  Here we check an option
1519       # to see if this is the behavior they want.
1520       if(CUDA_ATTACH_VS_BUILD_RULE_TO_CUDA_FILE)
1521         set(main_dep MAIN_DEPENDENCY ${source_file})
1522       else()
1523         set(main_dep DEPENDS ${source_file})
1524       endif()
1525
1526       if(CUDA_VERBOSE_BUILD)
1527         set(verbose_output ON)
1528       elseif(CMAKE_GENERATOR MATCHES "Makefiles")
1529         set(verbose_output "$(VERBOSE)")
1530       else()
1531         set(verbose_output OFF)
1532       endif()
1533
1534       # Create up the comment string
1535       file(RELATIVE_PATH generated_file_relative_path "${CMAKE_BINARY_DIR}" "${generated_file}")
1536       if(cuda_compile_to_external_module)
1537         set(cuda_build_comment_string "Building NVCC ${cuda_compile_to_external_module_type} file ${generated_file_relative_path}")
1538       else()
1539         set(cuda_build_comment_string "Building NVCC (${cuda_build_type}) object ${generated_file_relative_path}")
1540       endif()
1541
1542       set(_verbatim VERBATIM)
1543       if(ccbin_flags MATCHES "\\$\\(VCInstallDir\\)")
1544         set(_verbatim "")
1545       endif()
1546
1547       # Build the generated file and dependency file ##########################
1548       add_custom_command(
1549         OUTPUT ${generated_file}
1550         # These output files depend on the source_file and the contents of cmake_dependency_file
1551         ${main_dep}
1552         DEPENDS ${CUDA_NVCC_DEPEND}
1553         DEPENDS ${custom_target_script}
1554         # Make sure the output directory exists before trying to write to it.
1555         COMMAND ${CMAKE_COMMAND} -E make_directory "${generated_file_path}"
1556         COMMAND ${CMAKE_COMMAND} ARGS
1557           -D verbose:BOOL=${verbose_output}
1558           ${ccbin_flags}
1559           -D build_configuration:STRING=${CUDA_build_configuration}
1560           -D "generated_file:STRING=${generated_file}"
1561           -D "generated_cubin_file:STRING=${generated_cubin_file}"
1562           -P "${custom_target_script}"
1563         WORKING_DIRECTORY "${cuda_compile_intermediate_directory}"
1564         COMMENT "${cuda_build_comment_string}"
1565         ${_verbatim}
1566         )
1567
1568       # Make sure the build system knows the file is generated.
1569       set_source_files_properties(${generated_file} PROPERTIES GENERATED TRUE)
1570
1571       list(APPEND _cuda_wrap_generated_files ${generated_file})
1572
1573       # Add the other files that we want cmake to clean on a cleanup ##########
1574       list(APPEND CUDA_ADDITIONAL_CLEAN_FILES "${cmake_dependency_file}")
1575       list(REMOVE_DUPLICATES CUDA_ADDITIONAL_CLEAN_FILES)
1576       set(CUDA_ADDITIONAL_CLEAN_FILES ${CUDA_ADDITIONAL_CLEAN_FILES} CACHE INTERNAL "List of intermediate files that are part of the cuda dependency scanning.")
1577
1578     endif()
1579   endforeach()
1580
1581   # Set the return parameter
1582   set(${generated_files} ${_cuda_wrap_generated_files})
1583 endmacro()
1584
1585 function(_cuda_get_important_host_flags important_flags flag_string)
1586   if(CMAKE_GENERATOR MATCHES "Visual Studio")
1587     string(REGEX MATCHALL "/M[DT][d]?" flags "${flag_string}")
1588     list(APPEND ${important_flags} ${flags})
1589   else()
1590     string(REGEX MATCHALL "-fPIC" flags "${flag_string}")
1591     list(APPEND ${important_flags} ${flags})
1592   endif()
1593   set(${important_flags} ${${important_flags}} PARENT_SCOPE)
1594 endfunction()
1595
1596 ###############################################################################
1597 ###############################################################################
1598 # Separable Compilation Link
1599 ###############################################################################
1600 ###############################################################################
1601
1602 # Compute the filename to be used by CUDA_LINK_SEPARABLE_COMPILATION_OBJECTS
1603 function(CUDA_COMPUTE_SEPARABLE_COMPILATION_OBJECT_FILE_NAME output_file_var cuda_target object_files)
1604   if (object_files)
1605     set(generated_extension ${CMAKE_${CUDA_C_OR_CXX}_OUTPUT_EXTENSION})
1606     set(output_file "${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/${cuda_target}.dir/${CMAKE_CFG_INTDIR}/${cuda_target}_intermediate_link${generated_extension}")
1607   else()
1608     set(output_file)
1609   endif()
1610
1611   set(${output_file_var} "${output_file}" PARENT_SCOPE)
1612 endfunction()
1613
1614 # Setup the build rule for the separable compilation intermediate link file.
1615 function(CUDA_LINK_SEPARABLE_COMPILATION_OBJECTS output_file cuda_target options object_files)
1616   if (object_files)
1617
1618     set_source_files_properties("${output_file}"
1619       PROPERTIES
1620       EXTERNAL_OBJECT TRUE # This is an object file not to be compiled, but only
1621                            # be linked.
1622       GENERATED TRUE       # This file is generated during the build
1623       )
1624
1625     # For now we are ignoring all the configuration specific flags.
1626     set(nvcc_flags)
1627     CUDA_PARSE_NVCC_OPTIONS(nvcc_flags ${options})
1628     if(CUDA_64_BIT_DEVICE_CODE)
1629       list(APPEND nvcc_flags -m64)
1630     else()
1631       list(APPEND nvcc_flags -m32)
1632     endif()
1633     # If -ccbin, --compiler-bindir has been specified, don't do anything.  Otherwise add it here.
1634     list( FIND nvcc_flags "-ccbin" ccbin_found0 )
1635     list( FIND nvcc_flags "--compiler-bindir" ccbin_found1 )
1636     if( ccbin_found0 LESS 0 AND ccbin_found1 LESS 0 AND CUDA_HOST_COMPILER )
1637       # Match VERBATIM check below.
1638       if(CUDA_HOST_COMPILER MATCHES "\\$\\(VCInstallDir\\)")
1639         list(APPEND nvcc_flags -ccbin "\"${CUDA_HOST_COMPILER}\"")
1640       else()
1641         list(APPEND nvcc_flags -ccbin "${CUDA_HOST_COMPILER}")
1642       endif()
1643     endif()
1644
1645     # Create a list of flags specified by CUDA_NVCC_FLAGS_${CONFIG} and CMAKE_${CUDA_C_OR_CXX}_FLAGS*
1646     set(config_specific_flags)
1647     set(flags)
1648     foreach(config ${CUDA_configuration_types})
1649       string(TOUPPER ${config} config_upper)
1650       # Add config specific flags
1651       foreach(f ${CUDA_NVCC_FLAGS_${config_upper}})
1652         list(APPEND config_specific_flags $<$<CONFIG:${config}>:${f}>)
1653       endforeach()
1654       set(important_host_flags)
1655       _cuda_get_important_host_flags(important_host_flags "${CMAKE_${CUDA_C_OR_CXX}_FLAGS_${config_upper}}")
1656       foreach(f ${important_host_flags})
1657         list(APPEND flags $<$<CONFIG:${config}>:-Xcompiler> $<$<CONFIG:${config}>:${f}>)
1658       endforeach()
1659     endforeach()
1660     # Add CMAKE_${CUDA_C_OR_CXX}_FLAGS
1661     set(important_host_flags)
1662     _cuda_get_important_host_flags(important_host_flags "${CMAKE_${CUDA_C_OR_CXX}_FLAGS}")
1663     foreach(f ${important_host_flags})
1664       list(APPEND flags -Xcompiler ${f})
1665     endforeach()
1666
1667     # Add our general CUDA_NVCC_FLAGS with the configuration specifig flags
1668     set(nvcc_flags ${CUDA_NVCC_FLAGS} ${config_specific_flags} ${nvcc_flags})
1669
1670     file(RELATIVE_PATH output_file_relative_path "${CMAKE_BINARY_DIR}" "${output_file}")
1671
1672     # Some generators don't handle the multiple levels of custom command
1673     # dependencies correctly (obj1 depends on file1, obj2 depends on obj1), so
1674     # we work around that issue by compiling the intermediate link object as a
1675     # pre-link custom command in that situation.
1676     set(do_obj_build_rule TRUE)
1677     if (MSVC_VERSION GREATER 1599 AND MSVC_VERSION LESS 1800)
1678       # VS 2010 and 2012 have this problem.
1679       set(do_obj_build_rule FALSE)
1680     endif()
1681
1682     set(_verbatim VERBATIM)
1683     if(nvcc_flags MATCHES "\\$\\(VCInstallDir\\)")
1684       set(_verbatim "")
1685     endif()
1686
1687     if (do_obj_build_rule)
1688       add_custom_command(
1689         OUTPUT ${output_file}
1690         DEPENDS ${object_files}
1691         COMMAND ${CUDA_NVCC_EXECUTABLE} ${nvcc_flags} -dlink ${object_files} -o ${output_file}
1692         ${flags}
1693         COMMENT "Building NVCC intermediate link file ${output_file_relative_path}"
1694         ${_verbatim}
1695         )
1696     else()
1697       get_filename_component(output_file_dir "${output_file}" DIRECTORY)
1698       add_custom_command(
1699         TARGET ${cuda_target}
1700         PRE_LINK
1701         COMMAND ${CMAKE_COMMAND} -E echo "Building NVCC intermediate link file ${output_file_relative_path}"
1702         COMMAND ${CMAKE_COMMAND} -E make_directory "${output_file_dir}"
1703         COMMAND ${CUDA_NVCC_EXECUTABLE} ${nvcc_flags} ${flags} -dlink ${object_files} -o "${output_file}"
1704         ${_verbatim}
1705         )
1706     endif()
1707  endif()
1708 endfunction()
1709
1710 ###############################################################################
1711 ###############################################################################
1712 # ADD LIBRARY
1713 ###############################################################################
1714 ###############################################################################
1715 macro(CUDA_ADD_LIBRARY cuda_target)
1716
1717   CUDA_ADD_CUDA_INCLUDE_ONCE()
1718
1719   # Separate the sources from the options
1720   CUDA_GET_SOURCES_AND_OPTIONS(_sources _cmake_options _options ${ARGN})
1721   CUDA_BUILD_SHARED_LIBRARY(_cuda_shared_flag ${ARGN})
1722   # Create custom commands and targets for each file.
1723   CUDA_WRAP_SRCS( ${cuda_target} OBJ _generated_files ${_sources}
1724     ${_cmake_options} ${_cuda_shared_flag}
1725     OPTIONS ${_options} )
1726
1727   # Compute the file name of the intermedate link file used for separable
1728   # compilation.
1729   CUDA_COMPUTE_SEPARABLE_COMPILATION_OBJECT_FILE_NAME(link_file ${cuda_target} "${${cuda_target}_SEPARABLE_COMPILATION_OBJECTS}")
1730
1731   # Add the library.
1732   add_library(${cuda_target} ${_cmake_options}
1733     ${_generated_files}
1734     ${_sources}
1735     ${link_file}
1736     )
1737
1738   # Add a link phase for the separable compilation if it has been enabled.  If
1739   # it has been enabled then the ${cuda_target}_SEPARABLE_COMPILATION_OBJECTS
1740   # variable will have been defined.
1741   CUDA_LINK_SEPARABLE_COMPILATION_OBJECTS("${link_file}" ${cuda_target} "${_options}" "${${cuda_target}_SEPARABLE_COMPILATION_OBJECTS}")
1742
1743   target_link_libraries(${cuda_target}
1744     ${CUDA_LIBRARIES}
1745     )
1746
1747   if(CUDA_SEPARABLE_COMPILATION)
1748     target_link_libraries(${cuda_target}
1749       ${CUDA_cudadevrt_LIBRARY}
1750       )
1751   endif()
1752
1753   # We need to set the linker language based on what the expected generated file
1754   # would be. CUDA_C_OR_CXX is computed based on CUDA_HOST_COMPILATION_CPP.
1755   set_target_properties(${cuda_target}
1756     PROPERTIES
1757     LINKER_LANGUAGE ${CUDA_C_OR_CXX}
1758     )
1759
1760 endmacro()
1761
1762
1763 ###############################################################################
1764 ###############################################################################
1765 # ADD EXECUTABLE
1766 ###############################################################################
1767 ###############################################################################
1768 macro(CUDA_ADD_EXECUTABLE cuda_target)
1769
1770   CUDA_ADD_CUDA_INCLUDE_ONCE()
1771
1772   # Separate the sources from the options
1773   CUDA_GET_SOURCES_AND_OPTIONS(_sources _cmake_options _options ${ARGN})
1774   # Create custom commands and targets for each file.
1775   CUDA_WRAP_SRCS( ${cuda_target} OBJ _generated_files ${_sources} OPTIONS ${_options} )
1776
1777   # Compute the file name of the intermedate link file used for separable
1778   # compilation.
1779   CUDA_COMPUTE_SEPARABLE_COMPILATION_OBJECT_FILE_NAME(link_file ${cuda_target} "${${cuda_target}_SEPARABLE_COMPILATION_OBJECTS}")
1780
1781   # Add the library.
1782   add_executable(${cuda_target} ${_cmake_options}
1783     ${_generated_files}
1784     ${_sources}
1785     ${link_file}
1786     )
1787
1788   # Add a link phase for the separable compilation if it has been enabled.  If
1789   # it has been enabled then the ${cuda_target}_SEPARABLE_COMPILATION_OBJECTS
1790   # variable will have been defined.
1791   CUDA_LINK_SEPARABLE_COMPILATION_OBJECTS("${link_file}" ${cuda_target} "${_options}" "${${cuda_target}_SEPARABLE_COMPILATION_OBJECTS}")
1792
1793   target_link_libraries(${cuda_target}
1794     ${CUDA_LIBRARIES}
1795     )
1796
1797   # We need to set the linker language based on what the expected generated file
1798   # would be. CUDA_C_OR_CXX is computed based on CUDA_HOST_COMPILATION_CPP.
1799   set_target_properties(${cuda_target}
1800     PROPERTIES
1801     LINKER_LANGUAGE ${CUDA_C_OR_CXX}
1802     )
1803
1804 endmacro()
1805
1806
1807 ###############################################################################
1808 ###############################################################################
1809 # (Internal) helper for manually added cuda source files with specific targets
1810 ###############################################################################
1811 ###############################################################################
1812 macro(cuda_compile_base cuda_target format generated_files)
1813   # Update a counter in this directory, to keep phony target names unique.
1814   set(_cuda_target "${cuda_target}")
1815   get_property(_counter DIRECTORY PROPERTY _cuda_internal_phony_counter)
1816   if(_counter)
1817     math(EXPR _counter "${_counter} + 1")
1818   else()
1819     set(_counter 1)
1820   endif()
1821   set(_cuda_target "${_cuda_target}_${_counter}")
1822   set_property(DIRECTORY PROPERTY _cuda_internal_phony_counter ${_counter})
1823
1824   # Separate the sources from the options
1825   CUDA_GET_SOURCES_AND_OPTIONS(_sources _cmake_options _options ${ARGN})
1826
1827   # Create custom commands and targets for each file.
1828   CUDA_WRAP_SRCS( ${_cuda_target} ${format} _generated_files ${_sources}
1829                   ${_cmake_options} OPTIONS ${_options} PHONY)
1830
1831   set( ${generated_files} ${_generated_files})
1832
1833 endmacro()
1834
1835 ###############################################################################
1836 ###############################################################################
1837 # CUDA COMPILE
1838 ###############################################################################
1839 ###############################################################################
1840 macro(CUDA_COMPILE generated_files)
1841   cuda_compile_base(cuda_compile OBJ ${generated_files} ${ARGN})
1842 endmacro()
1843
1844 ###############################################################################
1845 ###############################################################################
1846 # CUDA COMPILE PTX
1847 ###############################################################################
1848 ###############################################################################
1849 macro(CUDA_COMPILE_PTX generated_files)
1850   cuda_compile_base(cuda_compile_ptx PTX ${generated_files} ${ARGN})
1851 endmacro()
1852
1853 ###############################################################################
1854 ###############################################################################
1855 # CUDA COMPILE FATBIN
1856 ###############################################################################
1857 ###############################################################################
1858 macro(CUDA_COMPILE_FATBIN generated_files)
1859   cuda_compile_base(cuda_compile_fatbin FATBIN ${generated_files} ${ARGN})
1860 endmacro()
1861
1862 ###############################################################################
1863 ###############################################################################
1864 # CUDA COMPILE CUBIN
1865 ###############################################################################
1866 ###############################################################################
1867 macro(CUDA_COMPILE_CUBIN generated_files)
1868   cuda_compile_base(cuda_compile_cubin CUBIN ${generated_files} ${ARGN})
1869 endmacro()
1870
1871
1872 ###############################################################################
1873 ###############################################################################
1874 # CUDA ADD CUFFT TO TARGET
1875 ###############################################################################
1876 ###############################################################################
1877 macro(CUDA_ADD_CUFFT_TO_TARGET target)
1878   if (CUDA_BUILD_EMULATION)
1879     target_link_libraries(${target} ${CUDA_cufftemu_LIBRARY})
1880   else()
1881     target_link_libraries(${target} ${CUDA_cufft_LIBRARY})
1882   endif()
1883 endmacro()
1884
1885 ###############################################################################
1886 ###############################################################################
1887 # CUDA ADD CUBLAS TO TARGET
1888 ###############################################################################
1889 ###############################################################################
1890 macro(CUDA_ADD_CUBLAS_TO_TARGET target)
1891   if (CUDA_BUILD_EMULATION)
1892     target_link_libraries(${target} ${CUDA_cublasemu_LIBRARY})
1893   else()
1894     target_link_libraries(${target} ${CUDA_cublas_LIBRARY} ${CUDA_cublas_device_LIBRARY})
1895   endif()
1896 endmacro()
1897
1898 ###############################################################################
1899 ###############################################################################
1900 # CUDA BUILD CLEAN TARGET
1901 ###############################################################################
1902 ###############################################################################
1903 macro(CUDA_BUILD_CLEAN_TARGET)
1904   # Call this after you add all your CUDA targets, and you will get a convience
1905   # target.  You should also make clean after running this target to get the
1906   # build system to generate all the code again.
1907
1908   set(cuda_clean_target_name clean_cuda_depends)
1909   if (CMAKE_GENERATOR MATCHES "Visual Studio")
1910     string(TOUPPER ${cuda_clean_target_name} cuda_clean_target_name)
1911   endif()
1912   add_custom_target(${cuda_clean_target_name}
1913     COMMAND ${CMAKE_COMMAND} -E remove ${CUDA_ADDITIONAL_CLEAN_FILES})
1914
1915   # Clear out the variable, so the next time we configure it will be empty.
1916   # This is useful so that the files won't persist in the list after targets
1917   # have been removed.
1918   set(CUDA_ADDITIONAL_CLEAN_FILES "" CACHE INTERNAL "List of intermediate files that are part of the cuda dependency scanning.")
1919 endmacro()