Description: Configuration for TX2 according to M24CompilerRuntime.docx
---- hercules-compiler-2017.11.orig/HerculesCompiler/llvm-passes/include/Config/Options.h
-+++ hercules-compiler-2017.11/HerculesCompiler/llvm-passes/include/Config/Options.h
-@@ -39,11 +39,11 @@
- //#define AGGRESSIVELY_INLINE_CALL_TREE
+Index: hercules-compiler/HerculesCompiler/llvm-passes/include/Config/Options.h
+===================================================================
+--- hercules-compiler.orig/HerculesCompiler/llvm-passes/include/Config/Options.h
++++ hercules-compiler/HerculesCompiler/llvm-passes/include/Config/Options.h
+@@ -41,11 +41,11 @@
+ //#define NEVER_INLINE_SYNC
// Use hardware cache prefetches in Specialized function, in place of SPM-based.
-//#define USE_HW_CACHES
// to bring the data local.
-//#define USE_HW_CACHES_PREFETCH
+#define USE_HW_CACHES_PREFETCH
+ //#define USE_HW_CACHES_INLINEPTX_PREFETCH
+ //#define USE_HW_CACHES_LIBCALL
//#define USE_HW_CACHES_VOLALOAD
-
-
---- hercules-compiler-2017.11.orig/clang/lib/Driver/ToolChains.cpp
-+++ hercules-compiler-2017.11/clang/lib/Driver/ToolChains.cpp
+Index: hercules-compiler/clang/lib/Driver/ToolChains.cpp
+===================================================================
+--- hercules-compiler.orig/clang/lib/Driver/ToolChains.cpp
++++ hercules-compiler/clang/lib/Driver/ToolChains.cpp
@@ -4967,7 +4967,7 @@ Tool *DragonFly::buildLinker() const {
// macro for it. Also, select the default PTX version to be used. We use 4.2 for
// compute capabilities older than 6.0 and 5.0 otherwise.
#endif
#if OPENMP_NVPTX_COMPUTE_CAPABILITY < 60
---- hercules-compiler-2017.11.orig/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt
-+++ hercules-compiler-2017.11/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt
+Index: hercules-compiler/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt
+===================================================================
+--- hercules-compiler.orig/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt
++++ hercules-compiler/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt
@@ -66,7 +66,7 @@ if(LIBOMPTARGET_DEP_CUDA_FOUND)
set(CUDA_ARCH ${CUDA_ARCH} -gencode arch=compute_${sm},code=sm_${sm})
endforeach()
+++ /dev/null
-Description: Allow installing the Hercules passes
-
---- hercules-compiler-2017.11.orig/HerculesCompiler/llvm-passes/src/Passes/AnnotatePTXGlobals/CMakeLists.txt
-+++ hercules-compiler-2017.11/HerculesCompiler/llvm-passes/src/Passes/AnnotatePTXGlobals/CMakeLists.txt
-@@ -7,3 +7,4 @@ add_library(AnnotatePTXGlobals
- ${PROJECTS_MAIN_INCLUDE_DIR}/Config/Macros.h
- ${PROJECTS_MAIN_INCLUDE_DIR}/Config/PassOptions.h )
-
-+install(TARGETS AnnotatePTXGlobals LIBRARY DESTINATION lib/hercules/AnnotatePTXGlobals)
---- hercules-compiler-2017.11.orig/HerculesCompiler/llvm-passes/src/Passes/ChannelArgInsertion/CMakeLists.txt
-+++ hercules-compiler-2017.11/HerculesCompiler/llvm-passes/src/Passes/ChannelArgInsertion/CMakeLists.txt
-@@ -7,3 +7,5 @@ add_library(ChannelArgInsertion
- ${PROJECTS_MAIN_INCLUDE_DIR}/Config/Constants.h
- ${PROJECTS_MAIN_INCLUDE_DIR}/Config/PassOptions.h
- ${PROJECTS_MAIN_INCLUDE_DIR}/Config/Macros.h )
-+
-+install(TARGETS ChannelArgInsertion LIBRARY DESTINATION lib/hercules/ChannelArgInsertion)
---- hercules-compiler-2017.11.orig/HerculesCompiler/llvm-passes/src/Passes/LoopChunk/CMakeLists.txt
-+++ hercules-compiler-2017.11/HerculesCompiler/llvm-passes/src/Passes/LoopChunk/CMakeLists.txt
-@@ -6,4 +6,4 @@ add_library(LoopChunk SHARED LoopChunk.c
- ${PROJECTS_MAIN_INCLUDE_DIR}/Config/PassOptions.h
- ${PROJECTS_MAIN_INCLUDE_DIR}/Config/Macros.h
- )
--
-+install(TARGETS LoopChunk LIBRARY DESTINATION lib/hercules/LoopChunk)
---- hercules-compiler-2017.11.orig/HerculesCompiler/llvm-passes/src/Passes/LoopExtract/CMakeLists.txt
-+++ hercules-compiler-2017.11/HerculesCompiler/llvm-passes/src/Passes/LoopExtract/CMakeLists.txt
-@@ -8,3 +8,5 @@ add_library(LoopExtract MODULE LoopExtra
-
- get_property(MODULE_FILE TARGET LoopExtract PROPERTY LOCATION)
- #configure_file(run.sh.in run.sh @ONLY)
-+
-+install(TARGETS LoopExtract LIBRARY DESTINATION lib/hercules/LoopExtract)
---- hercules-compiler-2017.11.orig/HerculesCompiler/llvm-passes/src/Passes/MarkLoopsToTransform/CMakeLists.txt
-+++ hercules-compiler-2017.11/HerculesCompiler/llvm-passes/src/Passes/MarkLoopsToTransform/CMakeLists.txt
-@@ -9,3 +9,4 @@ add_library(MarkLoopsToTransform
- ${PROJECTS_MAIN_INCLUDE_DIR}/Config/PassOptions.h
- ${PROJECTS_MAIN_INCLUDE_DIR}/Config/Macros.h )
-
-+install(TARGETS MarkLoopsToTransform LIBRARY DESTINATION lib/hercules/MarkLoopsToTransform)
---- hercules-compiler-2017.11.orig/HerculesCompiler/llvm-passes/src/Passes/WarpSeparate/CMakeLists.txt
-+++ hercules-compiler-2017.11/HerculesCompiler/llvm-passes/src/Passes/WarpSeparate/CMakeLists.txt
-@@ -9,3 +9,4 @@ add_library(WarpSeparate
- ${PROJECTS_MAIN_INCLUDE_DIR}/Config/PassOptions.h
- ${PROJECTS_MAIN_INCLUDE_DIR}/Config/Macros.h )
-
-+install(TARGETS WarpSeparate LIBRARY DESTINATION lib/hercules/WarpSeparate)
---- hercules-compiler-2017.11.orig/HerculesCompiler/llvm-passes/src/Passes/WarpSpecialize/CMakeLists.txt
-+++ hercules-compiler-2017.11/HerculesCompiler/llvm-passes/src/Passes/WarpSpecialize/CMakeLists.txt
-@@ -9,3 +9,4 @@ add_library(WarpSpecialize
- ${PROJECTS_MAIN_INCLUDE_DIR}/Config/PassOptions.h
- ${PROJECTS_MAIN_INCLUDE_DIR}/Config/Macros.h )
-
-+install(TARGETS WarpSpecialize LIBRARY DESTINATION lib/hercules/WarpSpecialize)