From 1dd2e10c6a4365a69b75350d8d43167ee250efad Mon Sep 17 00:00:00 2001
From: Flavio Kreiliger <kreilfla@fel.cvut.cz>
Date: Fri, 18 Jan 2019 16:21:00 +0100
Subject: [PATCH] Added patches to compile gguard kernel module    * Reverted
 main configuration to default of hercules-public    * Adjusted paths to
 correctly link to gpuguard-hyper

---
 Makefile                          |  12 +--
 debian/patches/gguard-hyper.patch | 150 ++++++++++++++++++++++++++++++
 debian/patches/series             |   1 +
 3 files changed, 157 insertions(+), 6 deletions(-)
 create mode 100644 debian/patches/gguard-hyper.patch

diff --git a/Makefile b/Makefile
index 96794c3..9682860 100644
--- a/Makefile
+++ b/Makefile
@@ -83,7 +83,7 @@ build_passes/build.ninja: | build_passes $(TMP_DESTDIR)/$(PREFIX)/lib/cmake/llvm
 		-DCPU_CACHE_SIZE_DEFAULT=524288 \
 		-DGPU_CACHE_SIZE_DEFAULT=524288 \
 		-DGPU_SCRATCHPAD_SIZE_DEFAULT=48000 \
-		-DUSE_HW_CACHES=ON \
+		-DUSE_HW_CACHES=OFF \
 		-DHIERARCHICAL_INTERVALS=ON \
 		-DPREFETCH_REPS=1 \
 		-DUSE_HW_CACHES_PREFETCH=ON \
@@ -94,11 +94,11 @@ build_passes/build.ninja: | build_passes $(TMP_DESTDIR)/$(PREFIX)/lib/cmake/llvm
 		-DUSE_HW_CACHES_INDWRITEBACK_LIBCALL=ON \
 		-DUSE_HW_CACHES_INDWRITEBACK_INLINE=OFF \
 		-DULES_EXTERNAL_LINKAGE=OFF \
-		-DALWAYS_INLINE_UNSPECIALIZED=OFF \
-		-DALWAYS_INLINE_LOAD=OFF \
-		-DALWAYS_INLINE_EXECUTE=OFF \
-		-DALWAYS_INLINE_STORE=OFF \
-		-DAGGRESSIVELY_INLINE_CALL_TREE=OFF \
+		-DALWAYS_INLINE_UNSPECIALIZED=ON \
+		-DALWAYS_INLINE_LOAD=ON \
+		-DALWAYS_INLINE_EXECUTE=ON \
+		-DALWAYS_INLINE_STORE=ON \
+		-DAGGRESSIVELY_INLINE_CALL_TREE=ON \
 		-G "Ninja" $(CURDIR)/HerculesCompiler/llvm-passes
 
 passes: build_passes/build.ninja
diff --git a/debian/patches/gguard-hyper.patch b/debian/patches/gguard-hyper.patch
new file mode 100644
index 0000000..cbbc8b8
--- /dev/null
+++ b/debian/patches/gguard-hyper.patch
@@ -0,0 +1,150 @@
+--- a/HerculesCompiler/gpuguard-hyper/Makefile
++++ b/HerculesCompiler/gpuguard-hyper/Makefile
+@@ -1,9 +1,13 @@
++export ARCH=arm64
++export CROSS_COMPILE=/opt/OSELAS.Toolchain-2014.12.2/aarch64-v8a-linux-gnu/gcc-4.9.2-glibc-2.20-binutils-2.24-kernel-3.16-sanitized/bin/aarch64-v8a-linux-gnu-
++
++
+ obj-m += gguard.o
+ 
+ ccflags-y := -DCONFIG_MULTI_CACHE
+ 
+ all:
+-	make -C /lib/modules/$(shell uname -r)/build M=$(PWD) modules
++	    make -C /home/kreilfla/jailhouse/jailhouse-build/build/kernel-4.4 M=$(PWD) modules
+ 
+ clean:
+-	make -C /lib/modules/$(shell uname -r)/build M=$(PWD) clean
++	    make -C /home/kreilfla/jailhouse/jailhouse-build/build/kernel-4.4 M=$(PWD) clean
+--- a/HerculesCompiler/gpuguard-hyper/gguard.c
++++ b/HerculesCompiler/gpuguard-hyper/gguard.c
+@@ -13,8 +13,7 @@
+  *
+  * Cache flushing:
+  *      https://www.kernel.org/doc/Documentation/cachetlb.txt
+- *
+- * Kernel timers:
++ * * Kernel timers:
+  *      https://www.kernel.org/doc/Documentation/timers/timers-howto.txt
+  *
+  * -----------------------------------------------------------------------------
+@@ -113,7 +112,7 @@
+ //#define COUNT_CACHE_MISSES
+ //#define PRINT_CACHE_MISSES_PER_PHASE
+ //#define REPORT_MEM_OVERRUNS_IN_CACHE_MISSES
+-//#define HYPERCALL_3ARG
++#define HYPERCALL_3ARG
+ //#define GG_PERFORM_SYNC_ONLY
+ #define IGNORE_WC_INTERRUPT_LATENCY
+ #define MAX_LATENCY_TO_BUDGET_FOR   12000       // in nano seconds
+@@ -319,7 +318,6 @@
+ 
+ static long gg_prem_mtex_req(uint64_t phase, uint64_t budget,
+         uint64_t timeout, uint64_t period, uint64_t flags) {
+-
+ #ifdef HYPERCALL_3ARG
+ 
+     // For backwards compatibility with the 3-argument interface to the 
+@@ -327,8 +325,8 @@
+ 
+     register uint64_t num_result asm("x0") = 9;
+     register uint64_t __arg1 asm("x1") = phase;
+-    register uint64_t __arg2 asm("x2") = budget;
+-    register uint64_t __arg3 asm("x3") = period;
++    register uint64_t __arg2 asm("x2") = 0;
++    register uint64_t __arg3 asm("x3") = 0;
+ 
+     asm volatile(
+             "hvc #0x4a48"
+@@ -1147,6 +1145,11 @@
+                 gg_stat_comp_cache_misses / gg_stat_entercomp);
+         printk(KERN_INFO " --- End of STATS --- \n");
+         #endif
++        gg_sync_zero_mem = ((gg_sync_zero_mem * 100) / gg_stat_entermem);
++
++        // Give the compute phase misses in percent of total (memory phase in
++        // else clause of preprocessor macro above).
++        gg_sync_zero_comp = ((gg_sync_zero_comp * 100) / gg_stat_entercomp);
+         
+         // We are going to return the gg_sync_zero_comp and gg_sync_zero_mem in
+         // a compressed 16-bit format. Thus we need to handle overflows. We do 
+@@ -1203,8 +1206,6 @@
+         result = gg_prem_mtex_req(hpar->phase, hpar->memory_budget,
+                          hpar->timeout, hpar->period, 
+                          hpar->flags);
+-
+-        // Return the result to user space.
+         return result;
+ 
+     } else {
+@@ -1446,6 +1447,7 @@
+ 
+                 // Enter a Compute phase when we finish execution, as we do not
+                 // want to hog memory for all future.
++#ifndef GG_PERFORM_SYNC_ONLY
+                 if(gg_prem_mtex_req(PREM_COMPUTE, 
+                                     UINT64_MAX,
+                                     UINT64_MAX,
+@@ -1453,7 +1455,7 @@
+                                     PREM_PERIODIC) != 0) {
+                     GG_BUG("PREM MTEX HYPERCALL FAILED", GG_STATE_UNREACHABLE);
+                 }
+-
++#endif
+                 // Reached end of GPU program.
+                 GG_STATE_CHECK(GG_STATE_POSTRUN);
+         
+--- a/HerculesCompiler/gpuguard-hyper/omp_interface/gpuguard-ompif.h
++++ b/HerculesCompiler/gpuguard-hyper/omp_interface/gpuguard-ompif.h
+@@ -26,7 +26,7 @@
+ /**
+  * Location of the GPUguard device node for communicating with the LKM.
+  */
+-#define GGUARD_DEV_PATH "/home/bjoernf/opt/dev/gguard-lkm"
++#define GGUARD_DEV_PATH "/dev/gguard-lkm"
+ 
+ // -----------------------
+ // -- GPUguard class --
+--- a/HerculesCompiler/gpuguard-hyper/omp_interface/hypercall-if.c
++++ b/HerculesCompiler/gpuguard-hyper/omp_interface/hypercall-if.c
+@@ -35,7 +35,7 @@
+ 
+ void PREM_PROTECT() {
+     if(stacked == 0) {
+-      prem_mtex_reg(PREM_MEMORY, -1, -1, -1, PREM_PERIODIC);
++      prem_mtex_reg(PREM_MEMORY, 0, 0, 0, PREM_PERIODIC);
+     }
+     stacked++;
+ }
+@@ -43,6 +43,6 @@
+ void PREM_UNPROTECT() {
+     stacked--;
+     if(stacked == 0) {
+-      prem_mtex_reg(PREM_COMPUTE, -1, -1, -1, PREM_PERIODIC);
++      prem_mtex_reg(PREM_COMPUTE, 0, 0, 0, PREM_PERIODIC);
+     }
+ }
+--- a/openmp/libomptarget/plugins/cuda/src/rtl.cpp
++++ b/openmp/libomptarget/plugins/cuda/src/rtl.cpp
+@@ -119,7 +119,7 @@
+ 
+ // bjoernf
+ #ifdef USE_GPUGUARD
+-  #include "../../../../../../../HerculesCompiler-public/gpuguard-hyper/omp_interface/gpuguard-ompif.h" // GPUguard
++  #include "../../../../../HerculesCompiler-public/gpuguard-hyper/omp_interface/gpuguard-ompif.h" // GPUguard
+   #include <fstream> // For reading and writing timer config files.
+   #include <iomanip> // For zero padding log output (time).
+   #include <sched.h> // For real-time scheduling
+--- a/openmp/libomptarget/src/omptarget.cpp
++++ b/openmp/libomptarget/src/omptarget.cpp
+@@ -30,8 +30,8 @@
+ // Header file global to this project
+ #include "omptarget.h"
+ #ifdef USE_GPUGUARD
+-#include "../../../../../HerculesCompiler-public/gpuguard-hyper/omp_interface/hypercall-if.h"
+-#include "../../../../../HerculesCompiler-public/gpuguard-hyper/omp_interface/hypercall-if.c"
++#include "../../../HerculesCompiler-public/gpuguard-hyper/omp_interface/hypercall-if.h"
++#include "../../../HerculesCompiler-public/gpuguard-hyper/omp_interface/hypercall-if.c"
+ #else
+ #define PREM_PROTECT()
+ #define PREM_UNPROTECT()
diff --git a/debian/patches/series b/debian/patches/series
index fd95367..2eec7cc 100644
--- a/debian/patches/series
+++ b/debian/patches/series
@@ -3,3 +3,4 @@ require-cuda.patch
 configure-herculescompiler-via-cmake.patch
 default-pass-root.patch
 Guard-architecture-dependent-code-with-#ifdefs.patch
+gguard-hyper.patch
-- 
2.39.2