From: Michal Sojka Date: Tue, 20 Nov 2018 09:13:52 +0000 (+0100) Subject: Configure HerculesCompiler via CMake rather than by editing Options.h X-Git-Tag: debian/2018.11-2~8 X-Git-Url: https://rtime.felk.cvut.cz/gitweb/hercules2020/hercules-compiler.git/commitdiff_plain/f32779e331a255943ce0db0f92acacbd18b35297 Configure HerculesCompiler via CMake rather than by editing Options.h We will update the top-level Makefile to take this into account in the next comits. --- diff --git a/debian/patches/configuration.patch b/debian/patches/configuration.patch deleted file mode 100644 index 89054c0..0000000 --- a/debian/patches/configuration.patch +++ /dev/null @@ -1,63 +0,0 @@ -Description: Configuration for TX2 according to M24CompilerRuntime.docx - ---- a/HerculesCompiler/llvm-passes/include/Config/Options.h -+++ b/HerculesCompiler/llvm-passes/include/Config/Options.h -@@ -41,22 +41,22 @@ - //#define NEVER_INLINE_SYNC - - // Use hardware cache prefetches in Specialized function, in place of SPM-based. --//#define USE_HW_CACHES -+#define USE_HW_CACHES - - // If we use hardware caches, we can either use load or prefetch instructions - // to bring the data local. --//#define USE_HW_CACHES_PREFETCH -+#define USE_HW_CACHES_PREFETCH - //#define USE_HW_CACHES_INLINEPTX_PREFETCH - //#define USE_HW_CACHES_LIBCALL - //#define USE_HW_CACHES_VOLALOAD - - // Use a single writeback function when using caches - //#define USE_HW_CACHES_SINGLEWRITEBACK --//#define USE_HW_CACHES_INDWRITEBACK_LIBCALL -+#define USE_HW_CACHES_INDWRITEBACK_LIBCALL - //#define USE_HW_CACHES_INDWRITEBACK_INLINE - --//#define DONT_SPECIALIZE_EXECUTE --//#define PREFETCH_REPS 1 -+#define DONT_SPECIALIZE_EXECUTE -+#define PREFETCH_REPS 1 - - // Allow exporting of PREMized functions through external linkage (useful for - // compiling PREMized library functions). ---- a/clang/lib/Driver/ToolChains.cpp -+++ b/clang/lib/Driver/ToolChains.cpp -@@ -4967,7 +4967,7 @@ - // macro for it. Also, select the default PTX version to be used. We use 4.2 for - // compute capabilities older than 6.0 and 5.0 otherwise. - #ifndef OPENMP_NVPTX_COMPUTE_CAPABILITY --#define OPENMP_NVPTX_COMPUTE_CAPABILITY 53 -+#define OPENMP_NVPTX_COMPUTE_CAPABILITY 62 - #endif - - #if OPENMP_NVPTX_COMPUTE_CAPABILITY < 60 ---- a/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt -+++ b/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt -@@ -66,7 +66,7 @@ - set(CUDA_ARCH ${CUDA_ARCH} -gencode arch=compute_${sm},code=sm_${sm}) - endforeach() - else() -- set(CUDA_ARCH -arch sm_35) -+ set(CUDA_ARCH -arch sm_62) - endif() - - # Activate RTL message dumps if requested by the user. -@@ -176,7 +176,7 @@ - set(CUDA_ARCH ${CUDA_ARCH} --cuda-gpu-arch=sm_${sm}) - endforeach() - else() -- set(CUDA_ARCH --cuda-gpu-arch=sm_35) -+ set(CUDA_ARCH --cuda-gpu-arch=sm_62) - endif() - - # Compile cuda files to bitcode. diff --git a/debian/patches/configure-herculescompiler-via-cmake.patch b/debian/patches/configure-herculescompiler-via-cmake.patch new file mode 100644 index 0000000..fff6fc5 --- /dev/null +++ b/debian/patches/configure-herculescompiler-via-cmake.patch @@ -0,0 +1,392 @@ +--- a/HerculesCompiler/llvm-passes/CMakeLists.txt ++++ b/HerculesCompiler/llvm-passes/CMakeLists.txt +@@ -84,6 +84,88 @@ + MESSAGE( STATUS "PROJECTS_MAIN_SRC_DIR = " ${PROJECTS_MAIN_SRC_DIR} ) + MESSAGE( STATUS "PROJECTS_MAIN_INCLUDE_DIR = " ${PROJECTS_MAIN_INCLUDE_DIR} ) + +-include_directories(${PROJECTS_MAIN_INCLUDE_DIR}) ++include_directories(${PROJECTS_MAIN_INCLUDE_DIR} ${CMAKE_CURRENT_BINARY_DIR}/include) ++ ++include(CMakeDependentOption) ++ ++# // ---------------------------- ++# // ---- CORE CONFIGURATION ---- ++# // ---------------------------- ++ ++# // Amount of bytes available in the SPM or cache ++# // SPM (TX1, TX2) = 48000 ++# // CACHE TX1 CPU = 524288 ++# // CACHE TX1 GPU = 262144 ++# // CACHE TX2 GPU = 524288 ++# // CACHE TX2 CPU = 524288 ++set(CPU_CACHE_SIZE_DEFAULT 524288 CACHE STRING "CPU cache size") ++set(GPU_CACHE_SIZE_DEFAULT 524288 CACHE STRING "GPU cache size") ++set(GPU_SCRATCHPAD_SIZE_DEFAULT 48000 CACHE STRING "GPU scratchpad size") ++ ++ ++option(USE_HW_CACHES "Use hardware cache prefetches in Specialized function, in place of SPM-based" OFF) ++option(HIERARCHICAL_INTERVALS "Use hierarchical PREM intervals for loops" OFF) ++ ++# // ----------------------------- ++# // ---- CACHE CONFIGURATION ---- ++# // ----------------------------- ++ ++# // If we use hardware caches, we can either use load or prefetch instructions ++# // to bring the data local. ++set(PREFETCH_REPS 1 CACHE STRING "") ++cmake_dependent_option(USE_HW_CACHES_PREFETCH ++ "Use prefetch instructions" ON "USE_HW_CACHES" ON) ++cmake_dependent_option(USE_HW_CACHES_INLINEPTX_PREFETCH ++ "Use INLINEPTX prefetch" OFF "USE_HW_CACHES" OFF) ++cmake_dependent_option(USE_HW_CACHES_LIBCALL ++ "Prefetch via library call" OFF "USE_HW_CACHES" OFF) ++cmake_dependent_option(USE_HW_CACHES_VOLALOAD ++ "Prefetch via load instructions" OFF "USE_HW_CACHES" OFF) ++ ++# // Use a single writeback function when using caches ++cmake_dependent_option(USE_HW_CACHES_SINGLEWRITEBACK ++ "" OFF "USE_HW_CACHES" OFF) ++cmake_dependent_option(USE_HW_CACHES_INDWRITEBACK_LIBCALL ++ "" ON "USE_HW_CACHES" ON) ++cmake_dependent_option(USE_HW_CACHES_INDWRITEBACK_INLINE ++ "" OFF "USE_HW_CACHES" OFF) ++ ++# // --------------------------------------- ++# // ---- CODE GENERATION CONFIGURATION ---- ++# // --------------------------------------- ++ ++# // Allow exporting of PREMized functions through external linkage (useful for ++# // compiling PREMized library functions). ++option(ULES_EXTERNAL_LINKAGE "Allow exporting of PREMized functions through external linkage (useful for compiling PREMized library functions)" ++ OFF) ++ ++# // Inlining of specialized functions ++option(ALWAYS_INLINE_UNSPECIALIZED "" ON) ++option(ALWAYS_INLINE_LOAD "" ON) ++option(ALWAYS_INLINE_EXECUTE "" ON) ++option(ALWAYS_INLINE_STORE "" ON) ++ ++option(AGGRESSIVELY_INLINE_CALL_TREE "Inlining of call trees in Channel Arg Insertion" ON) ++ ++# // ------------------------------------------------- ++# // ---- ADVANCED OPTIONS ---- DEFAULTS ARE SANE ---- ++# // ------------------------------------------------- ++ ++option(USE_PREM_INIT_FINI "Synchronization" ON) ++ ++option(NEVER_USE_SOFTDMA "Use the new DMA-like loading of data for Load and/or Store phases" ON) ++ ++# // Scheduling ++option(SCHEDULE_COMPATIBLE "" OFF) ++option(SCHEDULE_LES "" ON) ++option(SCHEDULE_COMBINED "" OFF) ++ ++set(NUM_THREADS_LES_IN_COMBINED 512 CACHE STRING "COMBINED schedule only: Number of threads to use for Specialized") ++ ++set(CACHE_LINE_SIZE 128 CACHE STRING "The size in bytes of a cache line (128 bytes on TX1)") ++ ++option(NEVER_INLINE_SYNC "Inlining of synchronization functions" OFF) ++ ++configure_file(include/Config/Options.h.in include/Config/Options.h) + + add_subdirectory(src) +--- a/HerculesCompiler/llvm-passes/include/Config/Options.h ++++ /dev/null +@@ -1,147 +0,0 @@ +- +-// ---------------------------- +-// ---- CORE CONFIGURATION ---- +-// ---------------------------- +- +-// Amount of bytes available in the SPM or cache +-// SPM (TX1, TX2) = 48000 +-// CACHE TX1 CPU = 524288 +-// CACHE TX1 GPU = 262144 +-// CACHE TX2 GPU = 524288 +-// CACHE TX2 CPU = 524288 +-#define CPU_CACHE_SIZE_DEFAULT 524288 +-#define GPU_CACHE_SIZE_DEFAULT 524288 +-#define GPU_SCRATCHPAD_SIZE_DEFAULT 48000 +- +-// Use hardware cache prefetches in Specialized function, in place of SPM-based. +-//#define USE_HW_CACHES +- +-// Use hierarchical PREM intervals for loops. +-#define HIERARCHICAL_INTERVALS +- +-// ----------------------------- +-// ---- CACHE CONFIGURATION ---- +-// ----------------------------- +- +-// If we use hardware caches, we can either use load or prefetch instructions +-// to bring the data local. +-#define PREFETCH_REPS 1 +-#define USE_HW_CACHES_PREFETCH +-//#define USE_HW_CACHES_INLINEPTX_PREFETCH +-//#define USE_HW_CACHES_LIBCALL +-//#define USE_HW_CACHES_VOLALOAD +- +-// Use a single writeback function when using caches +-//#define USE_HW_CACHES_SINGLEWRITEBACK +-#define USE_HW_CACHES_INDWRITEBACK_LIBCALL +-//#define USE_HW_CACHES_INDWRITEBACK_INLINE +- +-// --------------------------------------- +-// ---- CODE GENERATION CONFIGURATION ---- +-// --------------------------------------- +- +-// Allow exporting of PREMized functions through external linkage (useful for +-// compiling PREMized library functions). +-//#define ULES_EXTERNAL_LINKAGE +- +-// Inlining of specialized functions +-#define ALWAYS_INLINE_UNSPECIALIZED +-#define ALWAYS_INLINE_LOAD +-#define ALWAYS_INLINE_EXECUTE +-#define ALWAYS_INLINE_STORE +- +-// Inlining of call trees in Channel Arg Insertion +-#define AGGRESSIVELY_INLINE_CALL_TREE +- +-// ------------------------------------------------- +-// ---- ADVANCED OPTIONS ---- DEFAULTS ARE SANE ---- +-// ------------------------------------------------- +- +-// Synchronization +-#define USE_PREM_INIT_FINI +- +-// Use the new DMA-like loading of data for Load and/or Store phases +-#define NEVER_USE_SOFTDMA +- +-// Scheduling +-//#define SCHEDULE_COMPATIBLE +-#define SCHEDULE_LES +-//#define SCHEDULE_COMBINED +- +-// COMBINED schedule only: Number of threads to use for Specialized +-#define NUM_THREADS_LES_IN_COMBINED 512 +- +-// The size in bytes of a cache line (128 bytes on TX1) +-#define CACHE_LINE_SIZE 128 +- +-// Inlining of synchronization functions +-//#define NEVER_INLINE_SYNC +- +-// -------------------------------------------------------------------------- // +-// ---- DO NOT CHANGE BELOW THIS LINE ---- AUTOGENERATION BASED ON ABOVE ---- // +-// -------------------------------------------------------------------------- // +- +-// Check that schedule is sane. +-#if defined(SCHEDULE_COMPATIBLE) && \ +- (defined(SCHEDULE_LES) || defined(SCHEDULE_COMBINED)) +-#error Multiple schedules defined! +-#endif +-#if defined(SCHEDULE_LES) && \ +- (defined(SCHEDULE_COMPATIBLE) || defined(SCHEDULE_COMBINED)) +-#error Multiple schedules defined! +-#endif +-#if defined(SCHEDULE_COMBINED) && \ +- (defined(SCHEDULE_LES) || defined(SCHEDULE_COMPATIBLE)) +-#error Multiple schedules defined! +-#endif +-#if !defined(SCHEDULE_COMPATIBLE) && !defined(SCHEDULE_LES) && \ +- !defined(SCHEDULE_COMBINED) +-#error No schedule defined! +-#endif +- +-// Check that the HW CACHE config is sane. +-#ifdef USE_HW_CACHES +-#define DONT_SPECIALIZE_EXECUTE +-//# if defined(USE_HW_CACHES_PREFETCH) && defined(USE_HW_CACHES_VOLALOAD) +-//# error Using both volatile loads and prefetches for HW CACHE mode. +-//# endif +-#if !defined(USE_HW_CACHES_PREFETCH) && !defined(USE_HW_CACHES_VOLALOAD) && \ +- !defined(USE_HW_CACHES_INLINEPTX_PREFETCH) && \ +- !defined(USE_HW_CACHES_LIBCALL) +-#error No policy for HW caches defined! +-#endif +-#if defined(USE_HW_CACHES_INLINEPTX_PREFETCH) && \ +- (defined(USE_HW_CACHES_PREFETCH) || defined(USE_HW_CACHES_VOLALOAD)) +-#error Multiple cache policies defined! +-#endif +-#if defined(USE_HW_CACHES_PREFETCH) && \ +- (defined(USE_HW_CACHES_INLINEPTX_PREFETCH) || \ +- defined(USE_HW_CACHES_VOLALOAD)) +-#error Multiple cache policies defined! +-#endif +-#if defined(USE_HW_CACHES_VOLALOAD) && \ +- (defined(USE_HW_CACHES_PREFETCH) || \ +- defined(USE_HW_CACHES_INLINEPTX_PREFETCH)) +-#error Multiple cache policies defined! +-#endif +- +-#if defined(USE_HW_CACHES_SINGLEWRITEBACK) && \ +- defined(USE_HW_CACHES_INDWRITEBACK_LIBCALL) && \ +- defined(USE_HW_CACHES_INDWRITEBACK_INLINE) +-#error Multiple cache writeback policies! +-#elif defined(USE_HW_CACHES_INDWRITEBACK_LIBCALL) && \ +- defined(USE_HW_CACHES_INDWRITEBACK_INLINE) +-#error Multiple cache writeback policies! +-#elif defined(USE_HW_CACHES_SINGLEWRITEBACK) && \ +- defined(USE_HW_CACHES_INDWRITEBACK_INLINE) +-#error Multiple cache writeback policies! +-#elif defined(USE_HW_CACHES_SINGLEWRITEBACK) && \ +- defined(USE_HW_CACHES_INDWRITEBACK_LIBCALL) +-#error Multiple cache writeback policies! +-#endif +-#endif +- +-// Warning +-#ifdef EMPTY_COMPUTE_PHASE +-#warning Empty compute phase! +-#endif +--- /dev/null ++++ b/HerculesCompiler/llvm-passes/include/Config/Options.h.in +@@ -0,0 +1,147 @@ ++ ++// ---------------------------- ++// ---- CORE CONFIGURATION ---- ++// ---------------------------- ++ ++// Amount of bytes available in the SPM or cache ++// SPM (TX1, TX2) = 48000 ++// CACHE TX1 CPU = 524288 ++// CACHE TX1 GPU = 262144 ++// CACHE TX2 GPU = 524288 ++// CACHE TX2 CPU = 524288 ++#cmakedefine CPU_CACHE_SIZE_DEFAULT @GPU_CACHE_SIZE_DEFAULT@ ++#cmakedefine GPU_CACHE_SIZE_DEFAULT @CPU_CACHE_SIZE_DEFAULT@ ++#cmakedefine GPU_SCRATCHPAD_SIZE_DEFAULT @GPU_SCRATCHPAD_SIZE_DEFAULT@ ++ ++// Use hardware cache prefetches in Specialized function, in place of SPM-based. ++#cmakedefine USE_HW_CACHES ++ ++// Use hierarchical PREM intervals for loops. ++#cmakedefine HIERARCHICAL_INTERVALS ++ ++// ----------------------------- ++// ---- CACHE CONFIGURATION ---- ++// ----------------------------- ++ ++// If we use hardware caches, we can either use load or prefetch instructions ++// to bring the data local. ++#cmakedefine PREFETCH_REPS @PREFETCH_REPS@ ++#cmakedefine USE_HW_CACHES_PREFETCH ++#cmakedefine USE_HW_CACHES_INLINEPTX_PREFETCH ++#cmakedefine USE_HW_CACHES_LIBCALL ++#cmakedefine USE_HW_CACHES_VOLALOAD ++ ++// Use a single writeback function when using caches ++#cmakedefine USE_HW_CACHES_SINGLEWRITEBACK ++#cmakedefine USE_HW_CACHES_INDWRITEBACK_LIBCALL ++#cmakedefine USE_HW_CACHES_INDWRITEBACK_INLINE ++ ++// --------------------------------------- ++// ---- CODE GENERATION CONFIGURATION ---- ++// --------------------------------------- ++ ++// Allow exporting of PREMized functions through external linkage (useful for ++// compiling PREMized library functions). ++#cmakedefine ULES_EXTERNAL_LINKAGE ++ ++// Inlining of specialized functions ++#cmakedefine ALWAYS_INLINE_UNSPECIALIZED ++#cmakedefine ALWAYS_INLINE_LOAD ++#cmakedefine ALWAYS_INLINE_EXECUTE ++#cmakedefine ALWAYS_INLINE_STORE ++ ++// Inlining of call trees in Channel Arg Insertion ++#cmakedefine AGGRESSIVELY_INLINE_CALL_TREE ++ ++// ------------------------------------------------- ++// ---- ADVANCED OPTIONS ---- DEFAULTS ARE SANE ---- ++// ------------------------------------------------- ++ ++// Synchronization ++#cmakedefine USE_PREM_INIT_FINI ++ ++// Use the new DMA-like loading of data for Load and/or Store phases ++#cmakedefine NEVER_USE_SOFTDMA ++ ++// Scheduling ++#cmakedefine SCHEDULE_COMPATIBLE ++#cmakedefine SCHEDULE_LES ++#cmakedefine SCHEDULE_COMBINED ++ ++// COMBINED schedule only: Number of threads to use for Specialized ++#cmakedefine NUM_THREADS_LES_IN_COMBINED @NUM_THREADS_LES_IN_COMBINED@ ++ ++// The size in bytes of a cache line (128 bytes on TX1) ++#cmakedefine CACHE_LINE_SIZE @CACHE_LINE_SIZE@ ++ ++// Inlining of synchronization functions ++#cmakedefine NEVER_INLINE_SYNC ++ ++// -------------------------------------------------------------------------- // ++// ---- DO NOT CHANGE BELOW THIS LINE ---- AUTOGENERATION BASED ON ABOVE ---- // ++// -------------------------------------------------------------------------- // ++ ++// Check that schedule is sane. ++#if defined(SCHEDULE_COMPATIBLE) && \ ++ (defined(SCHEDULE_LES) || defined(SCHEDULE_COMBINED)) ++#error Multiple schedules defined! ++#endif ++#if defined(SCHEDULE_LES) && \ ++ (defined(SCHEDULE_COMPATIBLE) || defined(SCHEDULE_COMBINED)) ++#error Multiple schedules defined! ++#endif ++#if defined(SCHEDULE_COMBINED) && \ ++ (defined(SCHEDULE_LES) || defined(SCHEDULE_COMPATIBLE)) ++#error Multiple schedules defined! ++#endif ++#if !defined(SCHEDULE_COMPATIBLE) && !defined(SCHEDULE_LES) && \ ++ !defined(SCHEDULE_COMBINED) ++#error No schedule defined! ++#endif ++ ++// Check that the HW CACHE config is sane. ++#ifdef USE_HW_CACHES ++#define DONT_SPECIALIZE_EXECUTE ++//# if defined(USE_HW_CACHES_PREFETCH) && defined(USE_HW_CACHES_VOLALOAD) ++//# error Using both volatile loads and prefetches for HW CACHE mode. ++//# endif ++#if !defined(USE_HW_CACHES_PREFETCH) && !defined(USE_HW_CACHES_VOLALOAD) && \ ++ !defined(USE_HW_CACHES_INLINEPTX_PREFETCH) && \ ++ !defined(USE_HW_CACHES_LIBCALL) ++#error No policy for HW caches defined! ++#endif ++#if defined(USE_HW_CACHES_INLINEPTX_PREFETCH) && \ ++ (defined(USE_HW_CACHES_PREFETCH) || defined(USE_HW_CACHES_VOLALOAD)) ++#error Multiple cache policies defined! ++#endif ++#if defined(USE_HW_CACHES_PREFETCH) && \ ++ (defined(USE_HW_CACHES_INLINEPTX_PREFETCH) || \ ++ defined(USE_HW_CACHES_VOLALOAD)) ++#error Multiple cache policies defined! ++#endif ++#if defined(USE_HW_CACHES_VOLALOAD) && \ ++ (defined(USE_HW_CACHES_PREFETCH) || \ ++ defined(USE_HW_CACHES_INLINEPTX_PREFETCH)) ++#error Multiple cache policies defined! ++#endif ++ ++#if defined(USE_HW_CACHES_SINGLEWRITEBACK) && \ ++ defined(USE_HW_CACHES_INDWRITEBACK_LIBCALL) && \ ++ defined(USE_HW_CACHES_INDWRITEBACK_INLINE) ++#error Multiple cache writeback policies! ++#elif defined(USE_HW_CACHES_INDWRITEBACK_LIBCALL) && \ ++ defined(USE_HW_CACHES_INDWRITEBACK_INLINE) ++#error Multiple cache writeback policies! ++#elif defined(USE_HW_CACHES_SINGLEWRITEBACK) && \ ++ defined(USE_HW_CACHES_INDWRITEBACK_INLINE) ++#error Multiple cache writeback policies! ++#elif defined(USE_HW_CACHES_SINGLEWRITEBACK) && \ ++ defined(USE_HW_CACHES_INDWRITEBACK_LIBCALL) ++#error Multiple cache writeback policies! ++#endif ++#endif ++ ++// Warning ++#ifdef EMPTY_COMPUTE_PHASE ++#warning Empty compute phase! ++#endif diff --git a/debian/patches/series b/debian/patches/series index 434a9e0..601aa6a 100644 --- a/debian/patches/series +++ b/debian/patches/series @@ -1,4 +1,4 @@ -configuration.patch +libpremnotify-makefile.patch Guard-architecture-dependent-code-with-#ifdefs.patch require-cuda.patch -libpremnotify-makefile.patch +configure-herculescompiler-via-cmake.patch