]> rtime.felk.cvut.cz Git - hercules2020/hercules-compiler.git/commitdiff
Configure HerculesCompiler via CMake rather than by editing Options.h
authorMichal Sojka <michal.sojka@cvut.cz>
Tue, 20 Nov 2018 09:13:52 +0000 (10:13 +0100)
committerMichal Sojka <michal.sojka@cvut.cz>
Tue, 20 Nov 2018 22:36:44 +0000 (23:36 +0100)
We will update the top-level Makefile to take this into account in the
next comits.

debian/patches/configuration.patch [deleted file]
debian/patches/configure-herculescompiler-via-cmake.patch [new file with mode: 0644]
debian/patches/series

diff --git a/debian/patches/configuration.patch b/debian/patches/configuration.patch
deleted file mode 100644 (file)
index 89054c0..0000000
+++ /dev/null
@@ -1,63 +0,0 @@
-Description: Configuration for TX2 according to M24CompilerRuntime.docx
-
---- a/HerculesCompiler/llvm-passes/include/Config/Options.h
-+++ b/HerculesCompiler/llvm-passes/include/Config/Options.h
-@@ -41,22 +41,22 @@
- //#define NEVER_INLINE_SYNC
- // Use hardware cache prefetches in Specialized function, in place of SPM-based.
--//#define USE_HW_CACHES
-+#define USE_HW_CACHES
- // If we use hardware caches, we can either use load or prefetch instructions
- // to bring the data local.
--//#define USE_HW_CACHES_PREFETCH
-+#define USE_HW_CACHES_PREFETCH
- //#define USE_HW_CACHES_INLINEPTX_PREFETCH
- //#define USE_HW_CACHES_LIBCALL
- //#define USE_HW_CACHES_VOLALOAD
- // Use a single writeback function when using caches
- //#define USE_HW_CACHES_SINGLEWRITEBACK
--//#define USE_HW_CACHES_INDWRITEBACK_LIBCALL
-+#define USE_HW_CACHES_INDWRITEBACK_LIBCALL
- //#define USE_HW_CACHES_INDWRITEBACK_INLINE
--//#define DONT_SPECIALIZE_EXECUTE
--//#define PREFETCH_REPS 1
-+#define DONT_SPECIALIZE_EXECUTE
-+#define PREFETCH_REPS 1
- // Allow exporting of PREMized functions through external linkage (useful for
- // compiling PREMized library functions).
---- a/clang/lib/Driver/ToolChains.cpp
-+++ b/clang/lib/Driver/ToolChains.cpp
-@@ -4967,7 +4967,7 @@
- // macro for it. Also, select the default PTX version to be used. We use 4.2 for
- // compute capabilities older than 6.0 and 5.0 otherwise.
- #ifndef OPENMP_NVPTX_COMPUTE_CAPABILITY
--#define OPENMP_NVPTX_COMPUTE_CAPABILITY 53
-+#define OPENMP_NVPTX_COMPUTE_CAPABILITY 62
- #endif
- #if OPENMP_NVPTX_COMPUTE_CAPABILITY < 60
---- a/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt
-+++ b/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt
-@@ -66,7 +66,7 @@
-       set(CUDA_ARCH ${CUDA_ARCH} -gencode arch=compute_${sm},code=sm_${sm})
-     endforeach()
-   else()
--    set(CUDA_ARCH -arch sm_35)
-+    set(CUDA_ARCH -arch sm_62)
-   endif()
-   
-   # Activate RTL message dumps if requested by the user.
-@@ -176,7 +176,7 @@
-           set(CUDA_ARCH ${CUDA_ARCH} --cuda-gpu-arch=sm_${sm})
-         endforeach()
-       else()
--        set(CUDA_ARCH --cuda-gpu-arch=sm_35)
-+        set(CUDA_ARCH --cuda-gpu-arch=sm_62)
-       endif()
-       # Compile cuda files to bitcode.
diff --git a/debian/patches/configure-herculescompiler-via-cmake.patch b/debian/patches/configure-herculescompiler-via-cmake.patch
new file mode 100644 (file)
index 0000000..fff6fc5
--- /dev/null
@@ -0,0 +1,392 @@
+--- a/HerculesCompiler/llvm-passes/CMakeLists.txt
++++ b/HerculesCompiler/llvm-passes/CMakeLists.txt
+@@ -84,6 +84,88 @@
+ MESSAGE( STATUS "PROJECTS_MAIN_SRC_DIR = " ${PROJECTS_MAIN_SRC_DIR} )
+ MESSAGE( STATUS "PROJECTS_MAIN_INCLUDE_DIR = " ${PROJECTS_MAIN_INCLUDE_DIR} )
+-include_directories(${PROJECTS_MAIN_INCLUDE_DIR})
++include_directories(${PROJECTS_MAIN_INCLUDE_DIR} ${CMAKE_CURRENT_BINARY_DIR}/include)
++
++include(CMakeDependentOption)
++
++# // ----------------------------
++# // ---- CORE CONFIGURATION ----
++# // ----------------------------
++
++# // Amount of bytes available in the SPM or cache
++# // SPM (TX1, TX2) = 48000
++# // CACHE TX1 CPU  = 524288
++# // CACHE TX1 GPU  = 262144
++# // CACHE TX2 GPU  = 524288
++# // CACHE TX2 CPU  = 524288
++set(CPU_CACHE_SIZE_DEFAULT 524288 CACHE STRING "CPU cache size")
++set(GPU_CACHE_SIZE_DEFAULT 524288 CACHE STRING "GPU cache size")
++set(GPU_SCRATCHPAD_SIZE_DEFAULT 48000 CACHE STRING "GPU scratchpad size")
++
++
++option(USE_HW_CACHES "Use hardware cache prefetches in Specialized function, in place of SPM-based" OFF)
++option(HIERARCHICAL_INTERVALS "Use hierarchical PREM intervals for loops" OFF)
++
++# // -----------------------------
++# // ---- CACHE CONFIGURATION ----
++# // -----------------------------
++
++# // If we use hardware caches, we can either use load or prefetch instructions
++# // to bring the data local.
++set(PREFETCH_REPS 1 CACHE STRING "")
++cmake_dependent_option(USE_HW_CACHES_PREFETCH
++  "Use prefetch instructions" ON "USE_HW_CACHES" ON)
++cmake_dependent_option(USE_HW_CACHES_INLINEPTX_PREFETCH
++  "Use INLINEPTX prefetch"  OFF "USE_HW_CACHES" OFF)
++cmake_dependent_option(USE_HW_CACHES_LIBCALL
++  "Prefetch via library call"  OFF "USE_HW_CACHES" OFF)
++cmake_dependent_option(USE_HW_CACHES_VOLALOAD
++  "Prefetch via load instructions"  OFF "USE_HW_CACHES" OFF)
++
++# // Use a single writeback function when using caches
++cmake_dependent_option(USE_HW_CACHES_SINGLEWRITEBACK
++  ""  OFF "USE_HW_CACHES" OFF)
++cmake_dependent_option(USE_HW_CACHES_INDWRITEBACK_LIBCALL
++  ""  ON "USE_HW_CACHES" ON)
++cmake_dependent_option(USE_HW_CACHES_INDWRITEBACK_INLINE
++  ""  OFF "USE_HW_CACHES" OFF)
++
++# // ---------------------------------------
++# // ---- CODE GENERATION CONFIGURATION ----
++# // ---------------------------------------
++
++# // Allow exporting of PREMized functions through external linkage (useful for
++# // compiling PREMized library functions).
++option(ULES_EXTERNAL_LINKAGE "Allow exporting of PREMized functions through external linkage (useful for compiling PREMized library functions)"
++  OFF)
++
++# // Inlining of specialized functions
++option(ALWAYS_INLINE_UNSPECIALIZED "" ON)
++option(ALWAYS_INLINE_LOAD "" ON)
++option(ALWAYS_INLINE_EXECUTE "" ON)
++option(ALWAYS_INLINE_STORE "" ON)
++
++option(AGGRESSIVELY_INLINE_CALL_TREE "Inlining of call trees in Channel Arg Insertion" ON)
++
++# // -------------------------------------------------
++# // ---- ADVANCED OPTIONS ---- DEFAULTS ARE SANE ----
++# // -------------------------------------------------
++
++option(USE_PREM_INIT_FINI "Synchronization" ON)
++
++option(NEVER_USE_SOFTDMA "Use the new DMA-like loading of data for Load and/or Store phases" ON)
++
++# // Scheduling
++option(SCHEDULE_COMPATIBLE "" OFF)
++option(SCHEDULE_LES "" ON)
++option(SCHEDULE_COMBINED "" OFF)
++
++set(NUM_THREADS_LES_IN_COMBINED 512 CACHE STRING "COMBINED schedule only: Number of threads to use for Specialized")
++
++set(CACHE_LINE_SIZE 128 CACHE STRING "The size in bytes of a cache line (128 bytes on TX1)")
++
++option(NEVER_INLINE_SYNC "Inlining of synchronization functions" OFF)
++
++configure_file(include/Config/Options.h.in include/Config/Options.h)
+ add_subdirectory(src)
+--- a/HerculesCompiler/llvm-passes/include/Config/Options.h
++++ /dev/null
+@@ -1,147 +0,0 @@
+-
+-// ----------------------------
+-// ---- CORE CONFIGURATION ----
+-// ----------------------------
+-
+-// Amount of bytes available in the SPM or cache
+-// SPM (TX1, TX2) = 48000
+-// CACHE TX1 CPU  = 524288
+-// CACHE TX1 GPU  = 262144
+-// CACHE TX2 GPU  = 524288
+-// CACHE TX2 CPU  = 524288
+-#define CPU_CACHE_SIZE_DEFAULT  524288
+-#define GPU_CACHE_SIZE_DEFAULT  524288
+-#define GPU_SCRATCHPAD_SIZE_DEFAULT 48000
+-
+-// Use hardware cache prefetches in Specialized function, in place of SPM-based.
+-//#define USE_HW_CACHES
+-
+-// Use hierarchical PREM intervals for loops.
+-#define HIERARCHICAL_INTERVALS
+-
+-// -----------------------------
+-// ---- CACHE CONFIGURATION ----
+-// -----------------------------
+-
+-// If we use hardware caches, we can either use load or prefetch instructions
+-// to bring the data local.
+-#define PREFETCH_REPS 1
+-#define USE_HW_CACHES_PREFETCH
+-//#define USE_HW_CACHES_INLINEPTX_PREFETCH
+-//#define USE_HW_CACHES_LIBCALL
+-//#define USE_HW_CACHES_VOLALOAD
+-
+-// Use a single writeback function when using caches
+-//#define USE_HW_CACHES_SINGLEWRITEBACK
+-#define USE_HW_CACHES_INDWRITEBACK_LIBCALL
+-//#define USE_HW_CACHES_INDWRITEBACK_INLINE
+-
+-// ---------------------------------------
+-// ---- CODE GENERATION CONFIGURATION ----
+-// ---------------------------------------
+-
+-// Allow exporting of PREMized functions through external linkage (useful for
+-// compiling PREMized library functions).
+-//#define ULES_EXTERNAL_LINKAGE
+-
+-// Inlining of specialized functions
+-#define ALWAYS_INLINE_UNSPECIALIZED
+-#define ALWAYS_INLINE_LOAD
+-#define ALWAYS_INLINE_EXECUTE
+-#define ALWAYS_INLINE_STORE
+-
+-// Inlining of call trees in Channel Arg Insertion
+-#define AGGRESSIVELY_INLINE_CALL_TREE
+-
+-// -------------------------------------------------
+-// ---- ADVANCED OPTIONS ---- DEFAULTS ARE SANE ----
+-// -------------------------------------------------
+-
+-// Synchronization
+-#define USE_PREM_INIT_FINI
+-
+-// Use the new DMA-like loading of data for Load and/or Store phases
+-#define NEVER_USE_SOFTDMA
+-
+-// Scheduling
+-//#define SCHEDULE_COMPATIBLE
+-#define SCHEDULE_LES
+-//#define SCHEDULE_COMBINED
+-
+-// COMBINED schedule only: Number of threads to use for Specialized
+-#define NUM_THREADS_LES_IN_COMBINED 512
+-
+-// The size in bytes of a cache line (128 bytes on TX1)
+-#define CACHE_LINE_SIZE 128
+-
+-// Inlining of synchronization functions
+-//#define NEVER_INLINE_SYNC
+-
+-// -------------------------------------------------------------------------- //
+-// ---- DO NOT CHANGE BELOW THIS LINE ---- AUTOGENERATION BASED ON ABOVE ---- //
+-// -------------------------------------------------------------------------- //
+-
+-// Check that schedule is sane.
+-#if defined(SCHEDULE_COMPATIBLE) &&                                            \
+-    (defined(SCHEDULE_LES) || defined(SCHEDULE_COMBINED))
+-#error Multiple schedules defined!
+-#endif
+-#if defined(SCHEDULE_LES) &&                                                   \
+-    (defined(SCHEDULE_COMPATIBLE) || defined(SCHEDULE_COMBINED))
+-#error Multiple schedules defined!
+-#endif
+-#if defined(SCHEDULE_COMBINED) &&                                              \
+-    (defined(SCHEDULE_LES) || defined(SCHEDULE_COMPATIBLE))
+-#error Multiple schedules defined!
+-#endif
+-#if !defined(SCHEDULE_COMPATIBLE) && !defined(SCHEDULE_LES) &&                 \
+-    !defined(SCHEDULE_COMBINED)
+-#error No schedule defined!
+-#endif
+-
+-// Check that the HW CACHE config is sane.
+-#ifdef USE_HW_CACHES
+-#define DONT_SPECIALIZE_EXECUTE
+-//#  if defined(USE_HW_CACHES_PREFETCH) && defined(USE_HW_CACHES_VOLALOAD)
+-//#    error Using both volatile loads and prefetches for HW CACHE mode.
+-//#  endif
+-#if !defined(USE_HW_CACHES_PREFETCH) && !defined(USE_HW_CACHES_VOLALOAD) &&    \
+-    !defined(USE_HW_CACHES_INLINEPTX_PREFETCH) &&                              \
+-    !defined(USE_HW_CACHES_LIBCALL)
+-#error No policy for HW caches defined!
+-#endif
+-#if defined(USE_HW_CACHES_INLINEPTX_PREFETCH) &&                               \
+-    (defined(USE_HW_CACHES_PREFETCH) || defined(USE_HW_CACHES_VOLALOAD))
+-#error Multiple cache policies defined!
+-#endif
+-#if defined(USE_HW_CACHES_PREFETCH) &&                                         \
+-    (defined(USE_HW_CACHES_INLINEPTX_PREFETCH) ||                              \
+-     defined(USE_HW_CACHES_VOLALOAD))
+-#error Multiple cache policies defined!
+-#endif
+-#if defined(USE_HW_CACHES_VOLALOAD) &&                                         \
+-    (defined(USE_HW_CACHES_PREFETCH) ||                                        \
+-     defined(USE_HW_CACHES_INLINEPTX_PREFETCH))
+-#error Multiple cache policies defined!
+-#endif
+-
+-#if defined(USE_HW_CACHES_SINGLEWRITEBACK) &&                                  \
+-    defined(USE_HW_CACHES_INDWRITEBACK_LIBCALL) &&                             \
+-    defined(USE_HW_CACHES_INDWRITEBACK_INLINE)
+-#error Multiple cache writeback policies!
+-#elif defined(USE_HW_CACHES_INDWRITEBACK_LIBCALL) &&                           \
+-    defined(USE_HW_CACHES_INDWRITEBACK_INLINE)
+-#error Multiple cache writeback policies!
+-#elif defined(USE_HW_CACHES_SINGLEWRITEBACK) &&                                \
+-    defined(USE_HW_CACHES_INDWRITEBACK_INLINE)
+-#error Multiple cache writeback policies!
+-#elif defined(USE_HW_CACHES_SINGLEWRITEBACK) &&                                \
+-    defined(USE_HW_CACHES_INDWRITEBACK_LIBCALL)
+-#error Multiple cache writeback policies!
+-#endif
+-#endif
+-
+-// Warning
+-#ifdef EMPTY_COMPUTE_PHASE
+-#warning Empty compute phase!
+-#endif
+--- /dev/null
++++ b/HerculesCompiler/llvm-passes/include/Config/Options.h.in
+@@ -0,0 +1,147 @@
++
++// ----------------------------
++// ---- CORE CONFIGURATION ----
++// ----------------------------
++
++// Amount of bytes available in the SPM or cache
++// SPM (TX1, TX2) = 48000
++// CACHE TX1 CPU  = 524288
++// CACHE TX1 GPU  = 262144
++// CACHE TX2 GPU  = 524288
++// CACHE TX2 CPU  = 524288
++#cmakedefine CPU_CACHE_SIZE_DEFAULT @GPU_CACHE_SIZE_DEFAULT@
++#cmakedefine GPU_CACHE_SIZE_DEFAULT @CPU_CACHE_SIZE_DEFAULT@
++#cmakedefine GPU_SCRATCHPAD_SIZE_DEFAULT @GPU_SCRATCHPAD_SIZE_DEFAULT@
++
++// Use hardware cache prefetches in Specialized function, in place of SPM-based.
++#cmakedefine USE_HW_CACHES
++
++// Use hierarchical PREM intervals for loops.
++#cmakedefine HIERARCHICAL_INTERVALS
++
++// -----------------------------
++// ---- CACHE CONFIGURATION ----
++// -----------------------------
++
++// If we use hardware caches, we can either use load or prefetch instructions
++// to bring the data local.
++#cmakedefine PREFETCH_REPS @PREFETCH_REPS@
++#cmakedefine USE_HW_CACHES_PREFETCH
++#cmakedefine USE_HW_CACHES_INLINEPTX_PREFETCH
++#cmakedefine USE_HW_CACHES_LIBCALL
++#cmakedefine USE_HW_CACHES_VOLALOAD
++
++// Use a single writeback function when using caches
++#cmakedefine USE_HW_CACHES_SINGLEWRITEBACK
++#cmakedefine USE_HW_CACHES_INDWRITEBACK_LIBCALL
++#cmakedefine USE_HW_CACHES_INDWRITEBACK_INLINE
++
++// ---------------------------------------
++// ---- CODE GENERATION CONFIGURATION ----
++// ---------------------------------------
++
++// Allow exporting of PREMized functions through external linkage (useful for
++// compiling PREMized library functions).
++#cmakedefine ULES_EXTERNAL_LINKAGE
++
++// Inlining of specialized functions
++#cmakedefine ALWAYS_INLINE_UNSPECIALIZED
++#cmakedefine ALWAYS_INLINE_LOAD
++#cmakedefine ALWAYS_INLINE_EXECUTE
++#cmakedefine ALWAYS_INLINE_STORE
++
++// Inlining of call trees in Channel Arg Insertion
++#cmakedefine AGGRESSIVELY_INLINE_CALL_TREE
++
++// -------------------------------------------------
++// ---- ADVANCED OPTIONS ---- DEFAULTS ARE SANE ----
++// -------------------------------------------------
++
++// Synchronization
++#cmakedefine USE_PREM_INIT_FINI
++
++// Use the new DMA-like loading of data for Load and/or Store phases
++#cmakedefine NEVER_USE_SOFTDMA
++
++// Scheduling
++#cmakedefine SCHEDULE_COMPATIBLE
++#cmakedefine SCHEDULE_LES
++#cmakedefine SCHEDULE_COMBINED
++
++// COMBINED schedule only: Number of threads to use for Specialized
++#cmakedefine NUM_THREADS_LES_IN_COMBINED @NUM_THREADS_LES_IN_COMBINED@
++
++// The size in bytes of a cache line (128 bytes on TX1)
++#cmakedefine CACHE_LINE_SIZE @CACHE_LINE_SIZE@
++
++// Inlining of synchronization functions
++#cmakedefine NEVER_INLINE_SYNC
++
++// -------------------------------------------------------------------------- //
++// ---- DO NOT CHANGE BELOW THIS LINE ---- AUTOGENERATION BASED ON ABOVE ---- //
++// -------------------------------------------------------------------------- //
++
++// Check that schedule is sane.
++#if defined(SCHEDULE_COMPATIBLE) &&                                            \
++    (defined(SCHEDULE_LES) || defined(SCHEDULE_COMBINED))
++#error Multiple schedules defined!
++#endif
++#if defined(SCHEDULE_LES) &&                                                   \
++    (defined(SCHEDULE_COMPATIBLE) || defined(SCHEDULE_COMBINED))
++#error Multiple schedules defined!
++#endif
++#if defined(SCHEDULE_COMBINED) &&                                              \
++    (defined(SCHEDULE_LES) || defined(SCHEDULE_COMPATIBLE))
++#error Multiple schedules defined!
++#endif
++#if !defined(SCHEDULE_COMPATIBLE) && !defined(SCHEDULE_LES) &&                 \
++    !defined(SCHEDULE_COMBINED)
++#error No schedule defined!
++#endif
++
++// Check that the HW CACHE config is sane.
++#ifdef USE_HW_CACHES
++#define DONT_SPECIALIZE_EXECUTE
++//#  if defined(USE_HW_CACHES_PREFETCH) && defined(USE_HW_CACHES_VOLALOAD)
++//#    error Using both volatile loads and prefetches for HW CACHE mode.
++//#  endif
++#if !defined(USE_HW_CACHES_PREFETCH) && !defined(USE_HW_CACHES_VOLALOAD) &&    \
++    !defined(USE_HW_CACHES_INLINEPTX_PREFETCH) &&                              \
++    !defined(USE_HW_CACHES_LIBCALL)
++#error No policy for HW caches defined!
++#endif
++#if defined(USE_HW_CACHES_INLINEPTX_PREFETCH) &&                               \
++    (defined(USE_HW_CACHES_PREFETCH) || defined(USE_HW_CACHES_VOLALOAD))
++#error Multiple cache policies defined!
++#endif
++#if defined(USE_HW_CACHES_PREFETCH) &&                                         \
++    (defined(USE_HW_CACHES_INLINEPTX_PREFETCH) ||                              \
++     defined(USE_HW_CACHES_VOLALOAD))
++#error Multiple cache policies defined!
++#endif
++#if defined(USE_HW_CACHES_VOLALOAD) &&                                         \
++    (defined(USE_HW_CACHES_PREFETCH) ||                                        \
++     defined(USE_HW_CACHES_INLINEPTX_PREFETCH))
++#error Multiple cache policies defined!
++#endif
++
++#if defined(USE_HW_CACHES_SINGLEWRITEBACK) &&                                  \
++    defined(USE_HW_CACHES_INDWRITEBACK_LIBCALL) &&                             \
++    defined(USE_HW_CACHES_INDWRITEBACK_INLINE)
++#error Multiple cache writeback policies!
++#elif defined(USE_HW_CACHES_INDWRITEBACK_LIBCALL) &&                           \
++    defined(USE_HW_CACHES_INDWRITEBACK_INLINE)
++#error Multiple cache writeback policies!
++#elif defined(USE_HW_CACHES_SINGLEWRITEBACK) &&                                \
++    defined(USE_HW_CACHES_INDWRITEBACK_INLINE)
++#error Multiple cache writeback policies!
++#elif defined(USE_HW_CACHES_SINGLEWRITEBACK) &&                                \
++    defined(USE_HW_CACHES_INDWRITEBACK_LIBCALL)
++#error Multiple cache writeback policies!
++#endif
++#endif
++
++// Warning
++#ifdef EMPTY_COMPUTE_PHASE
++#warning Empty compute phase!
++#endif
index 434a9e0fbcdacf23b577ae637469c277e728e477..601aa6adc5fabe781855700a28e58eafd2695b5c 100644 (file)
@@ -1,4 +1,4 @@
-configuration.patch
+libpremnotify-makefile.patch
 Guard-architecture-dependent-code-with-#ifdefs.patch
 require-cuda.patch
-libpremnotify-makefile.patch
+configure-herculescompiler-via-cmake.patch