This makes it easier to understand nvprof timelines.
CUDA_SELECT_NVCC_ARCH_FLAGS(ARCH_FLAGS ${CUDA_ARCH_LIST})
list( APPEND CUDA_NVCC_FLAGS -O3 -std=c++11 ${ARCH_FLAGS} --default-stream per-thread) # --gpu-architecture sm_62 )
find_cuda_helper_libs(cufftw)
+ find_cuda_helper_libs(nvToolsExt)
ENDIF()
add_subdirectory(piotr_fhog)
ENDIF() #cuFFTW
IF(FFT STREQUAL "cuFFT")
- target_link_libraries(kcf ${CUDA_cufft_LIBRARY} ${CUDA_cublas_LIBRARY} ${CUDA_LIBRARIES})
+ target_link_libraries(kcf ${CUDA_cufft_LIBRARY} ${CUDA_cublas_LIBRARY} ${CUDA_LIBRARIES} ${CUDA_nvToolsExt_LIBRARY})
ENDIF()
IF(PROFILING)
#ifdef CUFFT
#include <cufft.h>
+#include "nvToolsExt.h"
#endif
public:
FTrace(DbgTracer &dt, const char *fn, const char *format, ...) : t(dt), funcName(fn)
{
+#ifdef CUFFT
+ nvtxRangePushA(fn);
+#endif
if (!t.debug) return;
char *arg;
va_list vl;
}
~FTrace()
{
+#ifdef CUFFT
+ nvtxRangePop();
+#endif
if (!t.debug) return;
t.indentLvl--;
std::cerr << t.indent() << "}" << std::endl;