]> rtime.felk.cvut.cz Git - hercules2020/hercules-compiler.git/commitdiff
Use Makefile configuration
authorKreiliger, Flavio <kreilfla@fel.cvut.cz>
Tue, 20 Nov 2018 14:07:01 +0000 (14:07 +0000)
committerMichal Sojka <michal.sojka@cvut.cz>
Tue, 20 Nov 2018 22:36:44 +0000 (23:36 +0100)
Makefile
debian/patches/cuda_arch.patch [new file with mode: 0644]
debian/patches/series
debian/patches/use_gpuguard.patch [new file with mode: 0644]

index 424e638fdc721a3ad75d04a39949dbee25737da3..6caf7c0190e0269e5c7c89233c23b8591b4d7884 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -70,8 +70,28 @@ $(TMP_DESTDIR)/$(PREFIX)/lib/cmake/llvm: | llvm-clang
 
 build_passes/build.ninja: | build_passes $(TMP_DESTDIR)/$(PREFIX)/lib/cmake/llvm
        cd $(@D) && cmake -DCMAKE_INSTALL_PREFIX="$(PREFIX)" \
-               -DLLVM_DIR':'STRING=$(TMP_DESTDIR)$(PREFIX)/lib/cmake/llvm -DCMAKE_BUILD_TYPE=DEBUG \
+               -DLLVM_DIR':'STRING=$(TMP_DESTDIR)$(PREFIX)/lib/cmake/llvm \
+               -DCMAKE_BUILD_TYPE=DEBUG \
                $(LLVM_CROSS_FLAGS) -DLLVM_TARGETS_TO_BUILD="X86;ARM" \
+               -DCPU_CACHE_SIZE_DEFAULT=524288 \
+               -DGPU_CACHE_SIZE_DEFAULT=524288 \
+               -DGPU_SCRATCHPAD_SIZE_DEFAULT=48000 \
+               -DUSE_HW_CACHES=ON \
+               -DHIERARCHICAL_INTERVALS=ON \
+               -DPREFETCH_REPS=1 \
+               -DUSE_HW_CACHES_PREFETCH=ON \
+               -DUSE_HW_CACHES_INLINEPTX_PREFETCH=OFF \
+               -DUSE_HW_CACHES_LIBCALL=OFF \
+               -DUSE_HW_CACHES_VOLALOAD=OFF \
+               -DUSE_HW_CACHES_SINGLEWRITEBACK=OFF \
+               -DUSE_HW_CACHES_INDWRITEBACK_LIBCALL=ON \
+               -DUSE_HW_CACHES_INDWRITEBACK_INLINE=OFF \
+               -DULES_EXTERNAL_LINKAGE=OFF \
+               -DALWAYS_INLINE_UNSPECIALIZED=OFF \
+               -DALWAYS_INLINE_LOAD=OFF \
+               -DALWAYS_INLINE_EXECUTE=OFF \
+               -DALWAYS_INLINE_STORE=OFF \
+               -DAGGRESSIVELY_INLINE_CALL_TREE=OFF \
                -G "Ninja" $(CURDIR)/HerculesCompiler/llvm-passes
 
 passes: build_passes/build.ninja
diff --git a/debian/patches/cuda_arch.patch b/debian/patches/cuda_arch.patch
new file mode 100644 (file)
index 0000000..842e368
--- /dev/null
@@ -0,0 +1,33 @@
+Description: Configuration for TX2 according to D33CompilerRuntime.docx 
+
+--- a/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt
++++ b/openmp/libomptarget/deviceRTLs/nvptx/CMakeLists.txt
+@@ -66,7 +66,7 @@
+       set(CUDA_ARCH ${CUDA_ARCH} -gencode arch=compute_${sm},code=sm_${sm})
+     endforeach()
+   else()
+-    set(CUDA_ARCH -arch sm_35)
++    set(CUDA_ARCH -arch sm_62)
+   endif()
+   
+   # Activate RTL message dumps if requested by the user.
+@@ -176,7 +176,7 @@
+           set(CUDA_ARCH ${CUDA_ARCH} --cuda-gpu-arch=sm_${sm})
+         endforeach()
+       else()
+-        set(CUDA_ARCH --cuda-gpu-arch=sm_35)
++        set(CUDA_ARCH --cuda-gpu-arch=sm_62)
+       endif()
+       # Compile cuda files to bitcode.
+--- a/clang/lib/Driver/ToolChains.cpp
++++ b/clang/lib/Driver/ToolChains.cpp
+@@ -4967,7 +4967,7 @@
+ // macro for it. Also, select the default PTX version to be used. We use 4.2 for
+ // compute capabilities older than 6.0 and 5.0 otherwise.
+ #ifndef OPENMP_NVPTX_COMPUTE_CAPABILITY
+-#define OPENMP_NVPTX_COMPUTE_CAPABILITY 53
++#define OPENMP_NVPTX_COMPUTE_CAPABILITY 62
+ #endif
+ #if OPENMP_NVPTX_COMPUTE_CAPABILITY < 60
index 601aa6adc5fabe781855700a28e58eafd2695b5c..eb6eecf0e55daaba8acc3d1d25cce07ddbbf9350 100644 (file)
@@ -2,3 +2,5 @@ libpremnotify-makefile.patch
 Guard-architecture-dependent-code-with-#ifdefs.patch
 require-cuda.patch
 configure-herculescompiler-via-cmake.patch
+use_gpuguard.patch
+cuda_arch.patch
diff --git a/debian/patches/use_gpuguard.patch b/debian/patches/use_gpuguard.patch
new file mode 100644 (file)
index 0000000..9163cb4
--- /dev/null
@@ -0,0 +1,22 @@
+--- a/HerculesCompiler/libpremnotify/libpremnotify-gpu.cu
++++ b/HerculesCompiler/libpremnotify/libpremnotify-gpu.cu
+@@ -47,7 +47,7 @@
+ // SYNC: We can use GPUguard, synchronization, or (if none of the below macros
+ //       are defined), no sync at all.
+ // Use GPUguard for synchronization
+-#define USE_GPUGUARD
++// #define USE_GPUGUARD
+ // Use barriers for synchronization
+ #define USE_BARRIERS
+--- a/openmp/libomptarget/plugins/cuda/src/rtl.cpp
++++ b/openmp/libomptarget/plugins/cuda/src/rtl.cpp
+@@ -19,7 +19,7 @@
+ // kernel, and the LKM will be invoked for synchronization. If it is not 
+ // defined, the offloading will be done as in the original libomptarget CUDA
+ // rtl.
+-#define USE_GPUGUARD
++// #define USE_GPUGUARD
+ // After each kernel, finalize the results to get output in deadline misses and
+ // enable the GPUguard LKM to output more statistics to the kernel log (if