From 1dd2e10c6a4365a69b75350d8d43167ee250efad Mon Sep 17 00:00:00 2001 From: Flavio Kreiliger Date: Fri, 18 Jan 2019 16:21:00 +0100 Subject: [PATCH] Added patches to compile gguard kernel module * Reverted main configuration to default of hercules-public * Adjusted paths to correctly link to gpuguard-hyper --- Makefile | 12 +-- debian/patches/gguard-hyper.patch | 150 ++++++++++++++++++++++++++++++ debian/patches/series | 1 + 3 files changed, 157 insertions(+), 6 deletions(-) create mode 100644 debian/patches/gguard-hyper.patch diff --git a/Makefile b/Makefile index 96794c3..9682860 100644 --- a/Makefile +++ b/Makefile @@ -83,7 +83,7 @@ build_passes/build.ninja: | build_passes $(TMP_DESTDIR)/$(PREFIX)/lib/cmake/llvm -DCPU_CACHE_SIZE_DEFAULT=524288 \ -DGPU_CACHE_SIZE_DEFAULT=524288 \ -DGPU_SCRATCHPAD_SIZE_DEFAULT=48000 \ - -DUSE_HW_CACHES=ON \ + -DUSE_HW_CACHES=OFF \ -DHIERARCHICAL_INTERVALS=ON \ -DPREFETCH_REPS=1 \ -DUSE_HW_CACHES_PREFETCH=ON \ @@ -94,11 +94,11 @@ build_passes/build.ninja: | build_passes $(TMP_DESTDIR)/$(PREFIX)/lib/cmake/llvm -DUSE_HW_CACHES_INDWRITEBACK_LIBCALL=ON \ -DUSE_HW_CACHES_INDWRITEBACK_INLINE=OFF \ -DULES_EXTERNAL_LINKAGE=OFF \ - -DALWAYS_INLINE_UNSPECIALIZED=OFF \ - -DALWAYS_INLINE_LOAD=OFF \ - -DALWAYS_INLINE_EXECUTE=OFF \ - -DALWAYS_INLINE_STORE=OFF \ - -DAGGRESSIVELY_INLINE_CALL_TREE=OFF \ + -DALWAYS_INLINE_UNSPECIALIZED=ON \ + -DALWAYS_INLINE_LOAD=ON \ + -DALWAYS_INLINE_EXECUTE=ON \ + -DALWAYS_INLINE_STORE=ON \ + -DAGGRESSIVELY_INLINE_CALL_TREE=ON \ -G "Ninja" $(CURDIR)/HerculesCompiler/llvm-passes passes: build_passes/build.ninja diff --git a/debian/patches/gguard-hyper.patch b/debian/patches/gguard-hyper.patch new file mode 100644 index 0000000..cbbc8b8 --- /dev/null +++ b/debian/patches/gguard-hyper.patch @@ -0,0 +1,150 @@ +--- a/HerculesCompiler/gpuguard-hyper/Makefile ++++ b/HerculesCompiler/gpuguard-hyper/Makefile +@@ -1,9 +1,13 @@ ++export ARCH=arm64 ++export CROSS_COMPILE=/opt/OSELAS.Toolchain-2014.12.2/aarch64-v8a-linux-gnu/gcc-4.9.2-glibc-2.20-binutils-2.24-kernel-3.16-sanitized/bin/aarch64-v8a-linux-gnu- ++ ++ + obj-m += gguard.o + + ccflags-y := -DCONFIG_MULTI_CACHE + + all: +- make -C /lib/modules/$(shell uname -r)/build M=$(PWD) modules ++ make -C /home/kreilfla/jailhouse/jailhouse-build/build/kernel-4.4 M=$(PWD) modules + + clean: +- make -C /lib/modules/$(shell uname -r)/build M=$(PWD) clean ++ make -C /home/kreilfla/jailhouse/jailhouse-build/build/kernel-4.4 M=$(PWD) clean +--- a/HerculesCompiler/gpuguard-hyper/gguard.c ++++ b/HerculesCompiler/gpuguard-hyper/gguard.c +@@ -13,8 +13,7 @@ + * + * Cache flushing: + * https://www.kernel.org/doc/Documentation/cachetlb.txt +- * +- * Kernel timers: ++ * * Kernel timers: + * https://www.kernel.org/doc/Documentation/timers/timers-howto.txt + * + * ----------------------------------------------------------------------------- +@@ -113,7 +112,7 @@ + //#define COUNT_CACHE_MISSES + //#define PRINT_CACHE_MISSES_PER_PHASE + //#define REPORT_MEM_OVERRUNS_IN_CACHE_MISSES +-//#define HYPERCALL_3ARG ++#define HYPERCALL_3ARG + //#define GG_PERFORM_SYNC_ONLY + #define IGNORE_WC_INTERRUPT_LATENCY + #define MAX_LATENCY_TO_BUDGET_FOR 12000 // in nano seconds +@@ -319,7 +318,6 @@ + + static long gg_prem_mtex_req(uint64_t phase, uint64_t budget, + uint64_t timeout, uint64_t period, uint64_t flags) { +- + #ifdef HYPERCALL_3ARG + + // For backwards compatibility with the 3-argument interface to the +@@ -327,8 +325,8 @@ + + register uint64_t num_result asm("x0") = 9; + register uint64_t __arg1 asm("x1") = phase; +- register uint64_t __arg2 asm("x2") = budget; +- register uint64_t __arg3 asm("x3") = period; ++ register uint64_t __arg2 asm("x2") = 0; ++ register uint64_t __arg3 asm("x3") = 0; + + asm volatile( + "hvc #0x4a48" +@@ -1147,6 +1145,11 @@ + gg_stat_comp_cache_misses / gg_stat_entercomp); + printk(KERN_INFO " --- End of STATS --- \n"); + #endif ++ gg_sync_zero_mem = ((gg_sync_zero_mem * 100) / gg_stat_entermem); ++ ++ // Give the compute phase misses in percent of total (memory phase in ++ // else clause of preprocessor macro above). ++ gg_sync_zero_comp = ((gg_sync_zero_comp * 100) / gg_stat_entercomp); + + // We are going to return the gg_sync_zero_comp and gg_sync_zero_mem in + // a compressed 16-bit format. Thus we need to handle overflows. We do +@@ -1203,8 +1206,6 @@ + result = gg_prem_mtex_req(hpar->phase, hpar->memory_budget, + hpar->timeout, hpar->period, + hpar->flags); +- +- // Return the result to user space. + return result; + + } else { +@@ -1446,6 +1447,7 @@ + + // Enter a Compute phase when we finish execution, as we do not + // want to hog memory for all future. ++#ifndef GG_PERFORM_SYNC_ONLY + if(gg_prem_mtex_req(PREM_COMPUTE, + UINT64_MAX, + UINT64_MAX, +@@ -1453,7 +1455,7 @@ + PREM_PERIODIC) != 0) { + GG_BUG("PREM MTEX HYPERCALL FAILED", GG_STATE_UNREACHABLE); + } +- ++#endif + // Reached end of GPU program. + GG_STATE_CHECK(GG_STATE_POSTRUN); + +--- a/HerculesCompiler/gpuguard-hyper/omp_interface/gpuguard-ompif.h ++++ b/HerculesCompiler/gpuguard-hyper/omp_interface/gpuguard-ompif.h +@@ -26,7 +26,7 @@ + /** + * Location of the GPUguard device node for communicating with the LKM. + */ +-#define GGUARD_DEV_PATH "/home/bjoernf/opt/dev/gguard-lkm" ++#define GGUARD_DEV_PATH "/dev/gguard-lkm" + + // ----------------------- + // -- GPUguard class -- +--- a/HerculesCompiler/gpuguard-hyper/omp_interface/hypercall-if.c ++++ b/HerculesCompiler/gpuguard-hyper/omp_interface/hypercall-if.c +@@ -35,7 +35,7 @@ + + void PREM_PROTECT() { + if(stacked == 0) { +- prem_mtex_reg(PREM_MEMORY, -1, -1, -1, PREM_PERIODIC); ++ prem_mtex_reg(PREM_MEMORY, 0, 0, 0, PREM_PERIODIC); + } + stacked++; + } +@@ -43,6 +43,6 @@ + void PREM_UNPROTECT() { + stacked--; + if(stacked == 0) { +- prem_mtex_reg(PREM_COMPUTE, -1, -1, -1, PREM_PERIODIC); ++ prem_mtex_reg(PREM_COMPUTE, 0, 0, 0, PREM_PERIODIC); + } + } +--- a/openmp/libomptarget/plugins/cuda/src/rtl.cpp ++++ b/openmp/libomptarget/plugins/cuda/src/rtl.cpp +@@ -119,7 +119,7 @@ + + // bjoernf + #ifdef USE_GPUGUARD +- #include "../../../../../../../HerculesCompiler-public/gpuguard-hyper/omp_interface/gpuguard-ompif.h" // GPUguard ++ #include "../../../../../HerculesCompiler-public/gpuguard-hyper/omp_interface/gpuguard-ompif.h" // GPUguard + #include // For reading and writing timer config files. + #include // For zero padding log output (time). + #include // For real-time scheduling +--- a/openmp/libomptarget/src/omptarget.cpp ++++ b/openmp/libomptarget/src/omptarget.cpp +@@ -30,8 +30,8 @@ + // Header file global to this project + #include "omptarget.h" + #ifdef USE_GPUGUARD +-#include "../../../../../HerculesCompiler-public/gpuguard-hyper/omp_interface/hypercall-if.h" +-#include "../../../../../HerculesCompiler-public/gpuguard-hyper/omp_interface/hypercall-if.c" ++#include "../../../HerculesCompiler-public/gpuguard-hyper/omp_interface/hypercall-if.h" ++#include "../../../HerculesCompiler-public/gpuguard-hyper/omp_interface/hypercall-if.c" + #else + #define PREM_PROTECT() + #define PREM_UNPROTECT() diff --git a/debian/patches/series b/debian/patches/series index fd95367..2eec7cc 100644 --- a/debian/patches/series +++ b/debian/patches/series @@ -3,3 +3,4 @@ require-cuda.patch configure-herculescompiler-via-cmake.patch default-pass-root.patch Guard-architecture-dependent-code-with-#ifdefs.patch +gguard-hyper.patch -- 2.39.2