-DCPU_CACHE_SIZE_DEFAULT=524288 \
-DGPU_CACHE_SIZE_DEFAULT=524288 \
-DGPU_SCRATCHPAD_SIZE_DEFAULT=48000 \
- -DUSE_HW_CACHES=ON \
+ -DUSE_HW_CACHES=OFF \
-DHIERARCHICAL_INTERVALS=ON \
-DPREFETCH_REPS=1 \
-DUSE_HW_CACHES_PREFETCH=ON \
-DUSE_HW_CACHES_INDWRITEBACK_LIBCALL=ON \
-DUSE_HW_CACHES_INDWRITEBACK_INLINE=OFF \
-DULES_EXTERNAL_LINKAGE=OFF \
- -DALWAYS_INLINE_UNSPECIALIZED=OFF \
- -DALWAYS_INLINE_LOAD=OFF \
- -DALWAYS_INLINE_EXECUTE=OFF \
- -DALWAYS_INLINE_STORE=OFF \
- -DAGGRESSIVELY_INLINE_CALL_TREE=OFF \
+ -DALWAYS_INLINE_UNSPECIALIZED=ON \
+ -DALWAYS_INLINE_LOAD=ON \
+ -DALWAYS_INLINE_EXECUTE=ON \
+ -DALWAYS_INLINE_STORE=ON \
+ -DAGGRESSIVELY_INLINE_CALL_TREE=ON \
-G "Ninja" $(CURDIR)/HerculesCompiler/llvm-passes
passes: build_passes/build.ninja
--- /dev/null
+--- a/HerculesCompiler/gpuguard-hyper/Makefile
++++ b/HerculesCompiler/gpuguard-hyper/Makefile
+@@ -1,9 +1,13 @@
++export ARCH=arm64
++export CROSS_COMPILE=/opt/OSELAS.Toolchain-2014.12.2/aarch64-v8a-linux-gnu/gcc-4.9.2-glibc-2.20-binutils-2.24-kernel-3.16-sanitized/bin/aarch64-v8a-linux-gnu-
++
++
+ obj-m += gguard.o
+
+ ccflags-y := -DCONFIG_MULTI_CACHE
+
+ all:
+- make -C /lib/modules/$(shell uname -r)/build M=$(PWD) modules
++ make -C /home/kreilfla/jailhouse/jailhouse-build/build/kernel-4.4 M=$(PWD) modules
+
+ clean:
+- make -C /lib/modules/$(shell uname -r)/build M=$(PWD) clean
++ make -C /home/kreilfla/jailhouse/jailhouse-build/build/kernel-4.4 M=$(PWD) clean
+--- a/HerculesCompiler/gpuguard-hyper/gguard.c
++++ b/HerculesCompiler/gpuguard-hyper/gguard.c
+@@ -13,8 +13,7 @@
+ *
+ * Cache flushing:
+ * https://www.kernel.org/doc/Documentation/cachetlb.txt
+- *
+- * Kernel timers:
++ * * Kernel timers:
+ * https://www.kernel.org/doc/Documentation/timers/timers-howto.txt
+ *
+ * -----------------------------------------------------------------------------
+@@ -113,7 +112,7 @@
+ //#define COUNT_CACHE_MISSES
+ //#define PRINT_CACHE_MISSES_PER_PHASE
+ //#define REPORT_MEM_OVERRUNS_IN_CACHE_MISSES
+-//#define HYPERCALL_3ARG
++#define HYPERCALL_3ARG
+ //#define GG_PERFORM_SYNC_ONLY
+ #define IGNORE_WC_INTERRUPT_LATENCY
+ #define MAX_LATENCY_TO_BUDGET_FOR 12000 // in nano seconds
+@@ -319,7 +318,6 @@
+
+ static long gg_prem_mtex_req(uint64_t phase, uint64_t budget,
+ uint64_t timeout, uint64_t period, uint64_t flags) {
+-
+ #ifdef HYPERCALL_3ARG
+
+ // For backwards compatibility with the 3-argument interface to the
+@@ -327,8 +325,8 @@
+
+ register uint64_t num_result asm("x0") = 9;
+ register uint64_t __arg1 asm("x1") = phase;
+- register uint64_t __arg2 asm("x2") = budget;
+- register uint64_t __arg3 asm("x3") = period;
++ register uint64_t __arg2 asm("x2") = 0;
++ register uint64_t __arg3 asm("x3") = 0;
+
+ asm volatile(
+ "hvc #0x4a48"
+@@ -1147,6 +1145,11 @@
+ gg_stat_comp_cache_misses / gg_stat_entercomp);
+ printk(KERN_INFO " --- End of STATS --- \n");
+ #endif
++ gg_sync_zero_mem = ((gg_sync_zero_mem * 100) / gg_stat_entermem);
++
++ // Give the compute phase misses in percent of total (memory phase in
++ // else clause of preprocessor macro above).
++ gg_sync_zero_comp = ((gg_sync_zero_comp * 100) / gg_stat_entercomp);
+
+ // We are going to return the gg_sync_zero_comp and gg_sync_zero_mem in
+ // a compressed 16-bit format. Thus we need to handle overflows. We do
+@@ -1203,8 +1206,6 @@
+ result = gg_prem_mtex_req(hpar->phase, hpar->memory_budget,
+ hpar->timeout, hpar->period,
+ hpar->flags);
+-
+- // Return the result to user space.
+ return result;
+
+ } else {
+@@ -1446,6 +1447,7 @@
+
+ // Enter a Compute phase when we finish execution, as we do not
+ // want to hog memory for all future.
++#ifndef GG_PERFORM_SYNC_ONLY
+ if(gg_prem_mtex_req(PREM_COMPUTE,
+ UINT64_MAX,
+ UINT64_MAX,
+@@ -1453,7 +1455,7 @@
+ PREM_PERIODIC) != 0) {
+ GG_BUG("PREM MTEX HYPERCALL FAILED", GG_STATE_UNREACHABLE);
+ }
+-
++#endif
+ // Reached end of GPU program.
+ GG_STATE_CHECK(GG_STATE_POSTRUN);
+
+--- a/HerculesCompiler/gpuguard-hyper/omp_interface/gpuguard-ompif.h
++++ b/HerculesCompiler/gpuguard-hyper/omp_interface/gpuguard-ompif.h
+@@ -26,7 +26,7 @@
+ /**
+ * Location of the GPUguard device node for communicating with the LKM.
+ */
+-#define GGUARD_DEV_PATH "/home/bjoernf/opt/dev/gguard-lkm"
++#define GGUARD_DEV_PATH "/dev/gguard-lkm"
+
+ // -----------------------
+ // -- GPUguard class --
+--- a/HerculesCompiler/gpuguard-hyper/omp_interface/hypercall-if.c
++++ b/HerculesCompiler/gpuguard-hyper/omp_interface/hypercall-if.c
+@@ -35,7 +35,7 @@
+
+ void PREM_PROTECT() {
+ if(stacked == 0) {
+- prem_mtex_reg(PREM_MEMORY, -1, -1, -1, PREM_PERIODIC);
++ prem_mtex_reg(PREM_MEMORY, 0, 0, 0, PREM_PERIODIC);
+ }
+ stacked++;
+ }
+@@ -43,6 +43,6 @@
+ void PREM_UNPROTECT() {
+ stacked--;
+ if(stacked == 0) {
+- prem_mtex_reg(PREM_COMPUTE, -1, -1, -1, PREM_PERIODIC);
++ prem_mtex_reg(PREM_COMPUTE, 0, 0, 0, PREM_PERIODIC);
+ }
+ }
+--- a/openmp/libomptarget/plugins/cuda/src/rtl.cpp
++++ b/openmp/libomptarget/plugins/cuda/src/rtl.cpp
+@@ -119,7 +119,7 @@
+
+ // bjoernf
+ #ifdef USE_GPUGUARD
+- #include "../../../../../../../HerculesCompiler-public/gpuguard-hyper/omp_interface/gpuguard-ompif.h" // GPUguard
++ #include "../../../../../HerculesCompiler-public/gpuguard-hyper/omp_interface/gpuguard-ompif.h" // GPUguard
+ #include <fstream> // For reading and writing timer config files.
+ #include <iomanip> // For zero padding log output (time).
+ #include <sched.h> // For real-time scheduling
+--- a/openmp/libomptarget/src/omptarget.cpp
++++ b/openmp/libomptarget/src/omptarget.cpp
+@@ -30,8 +30,8 @@
+ // Header file global to this project
+ #include "omptarget.h"
+ #ifdef USE_GPUGUARD
+-#include "../../../../../HerculesCompiler-public/gpuguard-hyper/omp_interface/hypercall-if.h"
+-#include "../../../../../HerculesCompiler-public/gpuguard-hyper/omp_interface/hypercall-if.c"
++#include "../../../HerculesCompiler-public/gpuguard-hyper/omp_interface/hypercall-if.h"
++#include "../../../HerculesCompiler-public/gpuguard-hyper/omp_interface/hypercall-if.c"
+ #else
+ #define PREM_PROTECT()
+ #define PREM_UNPROTECT()